From 3fb30fbac22b6a05a6c9888bcbb626e54981cada Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 28 Feb 2025 14:41:24 -0500 Subject: [PATCH 01/45] [ENH] Start adding zip file importer (and move XNAT classes) --- datman/importers.py | 941 ++++++++++++++++++++++++++++++++++++++++++++ datman/xnat.py | 645 +----------------------------- 2 files changed, 944 insertions(+), 642 deletions(-) create mode 100644 datman/importers.py diff --git a/datman/importers.py b/datman/importers.py new file mode 100644 index 00000000..f6d99628 --- /dev/null +++ b/datman/importers.py @@ -0,0 +1,941 @@ +"""Input formats that datman can use to read new data. + +This file contains classes for reading in data that is _new_ to datman. Datman +uses these classes to create a uniform interface for its exporters, which +create the files and database contents users may actually interact with. +""" +from abc import ABC, abstractmethod +import glob +import json +import logging +import os +import re +import shutil +from zipfile import ZipFile + +from datman.exceptions import ParseException, XnatException, InputException +from datman.utils import is_dicom, get_archive_headers + + +logger = logging.getLogger(__name__) + + +class SessionImporter(ABC): + + # Exporters currently use these from XNATExperiment: + # experiment.name + # experiment.source_name (related to sharing data) + # experiment.scans + # experiment.date + # experiment.is_shared() + + # Missed but possibly needed attributes (from extract): + # experiment.assign_scan_names(config, ident) + # + # Maybe we really just need a resource exporter class... + # experiment.resource_files (list of dicts) + # experiment.resource_IDs (dict of folder names to numerical IDs) + # e.g. {'behav': '297528', 'misc': '305312'} + + @property + @abstractmethod + def name(self) -> str: + """A valid ID for the scan session being imported. + """ + pass + + @property + @abstractmethod + def source_name(self) -> str: + """The original ID of a scan session shared from another project. + + If the session currently being imported originates from another + project, 'name' is the session's ID in the new project and source_name + corresponds to it's original ID. This will be equal to 'name' when + the session is not shared or sharing is not being tracked. + """ + pass + + @property + @abstractmethod + def date(self) -> str: + """A string representation (YYYY-MM-DD) of the scan collection date. + """ + pass + + @property + @abstractmethod + def scans(self) -> list['SeriesImporter']: + """A list scan series that belong to the session. + """ + pass + + @abstractmethod + def is_shared(self) -> bool: + """Indicates whether the session is shared with other projects. + """ + pass + + +class SeriesImporter(ABC): + # XNATScan attributes and methods used by exporters... + # .series + # .subject (FakeSideCar needs) + # .names + # .description + + # MISSED (may have missed more in dm_xnat_extract): + # scan.download_dir + # xnat copy for example points to: /scratch/dawn/temp_stuff/export_zip/xnat_copy/SPN10_CMH_0083_01_SE01_MR/scans/6-t1_mprage_T1_900/resources/DICOM/files + # unzipped copy would be (diff session): 20190116_Ex09352_ASND1MR_ASQB002/Ex09352_Se00003_SagT1Bravo-1mm-32ch/ + + @property + @abstractmethod + def series(self) -> str: + """A string representation of the series 'number' + + This should be a string because sometimes the 'number' comes with + non-numeric prefixes or postfixes (e.g. on XNAT in some circumstances). + """ + pass + + @property + @abstractmethod + def subject(self) -> str: + """The subject ID of the session this scan belongs to. + + The subject ID may vary from the SessionImporter.name (i.e. a + truncated or extended version of it as subject may be to experiment + on XNAT). + """ + pass + + @property + @abstractmethod + def description(self) -> str: + """The series description (as from the dicom headers). + """ + + @property + @abstractmethod + def names(self) -> list[str]: + """A list of valid scan names that may be applied to this series. + """ + pass + + +############################################################################### +#### XNAT classes, formerly in xnat.py + + +class XNATObject(ABC): + def _get_field(self, key): + if not self.raw_json.get("data_fields"): + return "" + return self.raw_json["data_fields"].get(key, "") + + +class XNATSubject(XNATObject): + def __init__(self, subject_json): + self.raw_json = subject_json + self.name = self._get_field("label") + self.project = self._get_field("project") + self.experiments = self._get_experiments() + + def _get_experiments(self): + experiments = [ + exp for exp in self.raw_json["children"] + if exp["field"] == "experiments/experiment" + ] + + if not experiments: + logger.debug(f"No experiments found for {self.name}") + return {} + + found = {} + for item in experiments[0]["items"]: + exper = XNATExperiment(self.project, self.name, item) + found[exper.name] = exper + + return found + + def __str__(self): + return f"" + + def __repr__(self): + return self.__str__() + + +class XNATExperiment(SessionImporter, XNATObject): + def __init__(self, project, subject_name, experiment_json): + self.raw_json = experiment_json + self.project = project + self.subject = subject_name + self.uid = self._get_field("UID") + self.id = self._get_field("ID") + self.date = self._get_field("date") + + if self.is_shared(): + self.name = [label for label in self.get_alt_labels() + if self.subject in label][0] + self.source_name = self._get_field("label") + else: + self.name = self._get_field("label") + self.source_name = self.name + + # Scan attributes + self.scans = self._get_scans() + self.scan_UIDs = self._get_scan_UIDs() + self.scan_resource_IDs = self._get_scan_rIDs() + + # Resource attributes + self.resource_files = self._get_contents("resources/resource") + self.resource_IDs = self._get_resource_IDs() + + # Misc - basically just OPT CU1 needs this + self.misc_resource_IDs = self._get_other_resource_IDs() + + # Use properties here to conform with SessionImporter interface + # and guarantee at creation that expected attributes exist + @property + def name(self) -> str: + return self._name + + @name.setter + def name(self, value: str): + self._name = value + + @property + def source_name(self) -> str: + return self._source_name + + @source_name.setter + def source_name(self, value: str): + self._source_name = value + + @property + def scans(self) -> list['XNATScan']: + return self._scans + + @scans.setter + def scans(self, value: list['XNATScan']): + self._scans = value + + @property + def date(self) -> str: + return self._date + + @date.setter + def date(self, value: str): + self._date = value + + def _get_contents(self, data_type): + children = self.raw_json.get("children", []) + + contents = [ + child["items"] for child in children if child["field"] == data_type + ] + return contents + + def _get_scans(self): + scans = self._get_contents("scans/scan") + if not scans: + logger.debug(f"No scans found for experiment {self.name}") + return scans + xnat_scans = [] + for scan_json in scans[0]: + xnat_scans.append(XNATScan(self, scan_json)) + return xnat_scans + + def _get_scan_UIDs(self): + return [scan.uid for scan in self.scans] + + def _get_scan_rIDs(self): + # These can be used to download a series from xnat + resource_ids = [] + for scan in self.scans: + for child in scan.raw_json["children"]: + if child["field"] != "file": + continue + for item in child["items"]: + try: + label = item["data_fields"]["label"] + except KeyError: + continue + if label != "DICOM": + continue + r_id = item["data_fields"]["xnat_abstractresource_id"] + resource_ids.append(str(r_id)) + return resource_ids + + def _get_resource_IDs(self): + if not self.resource_files: + return {} + + resource_ids = {} + for resource in self.resource_files[0]: + label = resource["data_fields"].get("label", "No Label") + resource_ids[label] = str( + resource["data_fields"]["xnat_abstractresource_id"]) + return resource_ids + + def _get_other_resource_IDs(self): + """ + OPT's CU site uploads niftis to their server. These niftis are neither + classified as resources nor as scans so our code misses them entirely. + This functions grabs the abstractresource_id for these and + any other unique files aside from snapshots so they can be downloaded + """ + r_ids = [] + for scan in self.scans: + for child in scan.raw_json["children"]: + for file_upload in child["items"]: + data_fields = file_upload["data_fields"] + try: + label = data_fields["label"] + except KeyError: + # Some entries don't have labels. Only hold some header + # values. These are safe to ignore + continue + + try: + data_format = data_fields["format"] + except KeyError: + # Some entries have labels but no format... or neither + if not label: + # If neither, ignore. Should just be an entry + # containing scan parameters, etc. + continue + data_format = label + + try: + r_id = str(data_fields["xnat_abstractresource_id"]) + except KeyError: + # Some entries have labels and/or a format but no + # actual files and so no resource id. These can also be + # safely ignored. + continue + + # ignore DICOM, it's grabbed elsewhere. Ignore snapshots + # entirely. Some things may not be labelled DICOM but may + # be format 'DICOM' so that needs to be checked for too. + if label != "DICOM" and (data_format != "DICOM" + and label != "SNAPSHOTS"): + r_ids.append(r_id) + return r_ids + + def get_autorun_ids(self, xnat): + """Find the ID(s) of the 'autorun.xml' workflow + + XNAT has this obnoxious, on-by-default and seemingly impossible to + disable, 'workflow' called AutoRun.xml. It appears to do nothing other + than prevent certain actions (like renaming subjects/experiments) if + it is stuck in the running or queued state. This will grab the autorun + ID for this experiment so that it can be modified. + + Sometimes more than one pipeline gets launched for a subject even + though the GUI only reports one. This will grab the ID for all of them. + + Returns: + list: A list of string reference IDs that can be used to change + the status of the pipeline for this subject using XNAT's API, + or the empty string if the pipeline is not found. + + Raises: + XnatException: If no AutoRun.xml pipeline instance is found or + the API response can't be parsed. + """ + query_xml = """ + + wrk:workflowData + + wrk:workflowData + pipeline_name + 0 + string + wrk:workflowData/pipeline_name + + + wrk:workflowData + wrk_workflowData_id + 1 + string + wrk:workflowData/wrk_workflowData_id + + + + wrk:workflowData/ID + LIKE + {exp_id} + + + wrk:workflowData/ExternalID + = + {project} + + + wrk:workflowData/pipeline_name + = + xnat_tools/AutoRun.xml + + + + """.format(exp_id=self.id, project=self.project) # noqa: E501 + + query_url = f"{xnat.server}/data/search?format=json" + response = xnat._make_xnat_post(query_url, data=query_xml) + + if not response: + raise XnatException("AutoRun.xml pipeline not found.") + + try: + found_pipelines = json.loads(response) + except json.JSONDecodeError: + raise XnatException("Can't decode workflow query response.") + + try: + results = found_pipelines["ResultSet"]["Result"] + except KeyError: + return [] + + wf_ids = [item.get("workflow_id") for item in results] + + return wf_ids + + def get_resources(self, xnat_connection): + """ + Returns a list of all resource URIs from this session. + """ + resources = [] + resource_ids = list(self.resource_IDs.values()) + resource_ids.extend(self.misc_resource_IDs) + for r_id in resource_ids: + resource_list = xnat_connection.get_resource_list( + self.project, self.subject, self.name, r_id) + resources.extend([item["URI"] for item in resource_list]) + return resources + + def download(self, xnat, dest_folder, zip_name=None): + """ + Download a zip file containing all data for this session. Returns the + path to the new file if download is successful, raises an exception if + not + + Args: + xnat: An instance of datman.xnat.xnat() + dest_folder: The absolute path to the folder where the zip + should be deposited + zip_name: An optional name for the output zip file. If not + set the zip name will be session.name + + """ + resources_list = list(self.scan_resource_IDs) + resources_list.extend(self.misc_resource_IDs) + resources_list.extend(self.resource_IDs) + + if not resources_list: + raise ValueError(f"No scans or resources found for {self.name}") + + url = (f"{xnat.server}/REST/experiments/{self.id}/resources/" + f"{','.join(resources_list)}/files?structure=improved" + "&all=true&format=zip") + + if not zip_name: + zip_name = self.name.upper() + ".zip" + + output_path = os.path.join(dest_folder, zip_name) + if os.path.exists(output_path): + logger.error( + f"Cannot download {output_path}, file already exists.") + return output_path + + xnat._get_xnat_stream(url, output_path) + + return output_path + + def assign_scan_names(self, config, ident): + """Assign a datman style name to each scan in this experiment. + + This will populate the XnatScan.names and XnatScan.tags fields + for any scan that matches the study's export configuration. + + Args: + config (:obj:`datman.config.config`): A config object for the + study this experiment belongs to. + ident (:obj:`datman.scanid.Identifier`): A valid ID to apply + to this experiment's data. + """ + tags = config.get_tags(site=ident.site) + if not tags.series_map: + logger.error( + f"Failed to get tag export info for study {config.study_name}" + f" and site {ident.site}") + return + + for scan in self.scans: + try: + scan.set_datman_name(str(ident), tags) + except Exception as e: + logger.info( + f"Failed to make file name for series {scan.series} " + f"in session {str(ident)}. Reason {type(e).__name__}: " + f"{e}") + + def is_shared(self) -> bool: + """Check if the experiment is shared from another project. + """ + alt_names = self.get_alt_labels() + if not alt_names: + return False + + return any([self.subject in label for label in alt_names]) + + def get_alt_labels(self): + """Find the names for all shared copies of the XNAT experiment. + """ + shared = self._get_contents("sharing/share") + if not shared: + return [] + return [item['data_fields']['label'] for item in shared[0]] + + def __str__(self): + return f"" + + def __repr__(self): + return self.__str__() + + +class XNATScan(SeriesImporter, XNATObject): + def __init__(self, experiment, scan_json): + self.project = experiment.project + self.subject = experiment.subject + self.experiment = experiment.name + self.shared = experiment.is_shared() + self.source_experiment = experiment.source_name + self.raw_json = scan_json + self.uid = self._get_field("UID") + self.series = self._get_field("ID") + self.image_type = self._get_field("parameters/imageType") + self.multiecho = self.is_multiecho() + self.description = self._set_description() + self.type = self._get_field("type") + self.names = [] + self.tags = [] + self.download_dir = None + + # Use properties here to conform with SeriesImporter interface + # and guarantee at creation that expected attributes exist + @property + def series(self) -> str: + return self._series + + @series.setter + def series(self, value: str): + self._series = value + + @property + def subject(self) -> str: + return self._subject + + @subject.setter + def subject(self, value: str): + self._subject = value + + @property + def description(self) -> str: + return self._description + + @description.setter + def description(self, value: str): + self._description = value + + @property + def names(self) -> list[str]: + return self._names + + @names.setter + def names(self, value: list[str]): + self._names = value + + def _set_description(self): + series_descr = self._get_field("series_description") + if series_descr: + return series_descr + return self._get_field("type") + + def is_multiecho(self): + try: + child = self.raw_json["children"][0]["items"][0] + except (KeyError, IndexError): + return False + name = child["data_fields"].get("name") + if name and "MultiEcho" in name: + return True + return False + + def raw_dicoms_exist(self): + for child in self.raw_json["children"]: + for item in child["items"]: + file_type = item["data_fields"].get("content") + if file_type == "RAW": + return True + return False + + def is_derived(self): + if not self.image_type: + logger.warning( + f"Image type could not be found for series {self.series}. " + "Assuming it's not derived.") + return False + if "DERIVED" in self.image_type: + return True + return False + + def set_tag(self, tag_map): + matches = {} + for tag, pattern in tag_map.items(): + + if 'SeriesDescription' in pattern: + regex = pattern['SeriesDescription'] + search_target = self.description + elif 'XnatType' in pattern: + regex = pattern['XnatType'] + search_target = self.type + else: + raise KeyError( + "Missing keys 'SeriesDescription' or 'XnatType'" + " for Pattern!") + + if isinstance(regex, list): + regex = "|".join(regex) + if re.search(regex, search_target, re.IGNORECASE): + matches[tag] = pattern + + if len(matches) == 1 or (len(matches) == 2 and self.multiecho): + self.tags = list(matches.keys()) + return matches + return self._set_fmap_tag(tag_map, matches) + + def _set_fmap_tag(self, tag_map, matches): + try: + for tag, pattern in tag_map.items(): + if tag in matches: + if not re.search(pattern["ImageType"], self.image_type): + del matches[tag] + except Exception: + matches = {} + + if len(matches) > 2 or (len(matches) == 2 and not self.multiecho): + matches = {} + self.tags = list(matches.keys()) + return matches + + def set_datman_name(self, base_name, tags): + mangled_descr = self._mangle_descr() + padded_series = self.series.zfill(2) + tag_settings = self.set_tag(tags.series_map) + if not tag_settings: + raise ParseException( + f"Can't identify tag for series {self.series}") + names = [] + self.echo_dict = {} + for tag in tag_settings: + name = "_".join([base_name, tag, padded_series, mangled_descr]) + if self.multiecho: + echo_num = tag_settings[tag]["EchoNumber"] + if echo_num not in self.echo_dict: + self.echo_dict[echo_num] = name + names.append(name) + + if len(self.tags) > 1 and not self.multiecho: + logger.error(f"Multiple export patterns match for {base_name}, " + f"descr: {self.description}, tags: {self.tags}") + names = [] + self.tags = [] + + self.names = names + return names + + def _mangle_descr(self): + if not self.description: + return "" + return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description) + + def is_usable(self, strict=False): + if not self.raw_dicoms_exist(): + logger.debug(f"Ignoring {self.series} for {self.experiment}. " + f"No RAW dicoms exist.") + return False + + if not self.description: + logger.error(f"Can't find description for series {self.series} " + f"from session {self.experiment}.") + return False + + if not strict: + return True + + if self.is_derived(): + logger.debug( + f"Series {self.series} in session {self.experiment} is a " + "derived scan. Ignoring.") + return False + + if not self.names: + return False + + return True + + def download(self, xnat_conn, output_dir): + """Download all dicoms for this series. + + This will download all files in the series, and if successful, + set the download_dir attribute to the destination folder. + + Args: + xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection + to the server to download from. + output_dir (:obj:`str`): The full path to the location to + download all files to. + + Returns: + bool: True if the series was downloaded, False otherwise. + """ + logger.info(f"Downloading dicoms for {self.experiment} series: " + f"{self.series}.") + + if self.download_dir: + logger.debug( + "Data has been previously downloaded, skipping redownload.") + return True + + try: + dicom_zip = xnat_conn.get_dicom(self.project, self.subject, + self.experiment, self.series) + except Exception as e: + logger.error(f"Failed to download dicom archive for {self.subject}" + f" series {self.series}. Reason - {e}") + return False + + if os.path.getsize(dicom_zip) == 0: + logger.error( + f"Server returned an empty file for series {self.series} in " + f"session {self.experiment}. This may be a server error." + ) + os.remove(dicom_zip) + return False + + logger.info(f"Unpacking archive {dicom_zip}") + + try: + with ZipFile(dicom_zip, "r") as fh: + fh.extractall(output_dir) + except Exception as e: + logger.error("An error occurred unpacking dicom archive for " + f"{self.experiment}'s series {self.series}' - {e}") + os.remove(dicom_zip) + return False + else: + logger.info("Unpacking complete. Deleting archive file " + f"{dicom_zip}") + os.remove(dicom_zip) + + if self.shared: + self._fix_download_name(output_dir) + + dicom_file = self._find_first_dicom(output_dir) + + try: + self.download_dir = os.path.dirname(dicom_file) + except TypeError: + logger.warning("No valid dicom files found in XNAT session " + f"{self.subject} series {self.series}.") + return False + return True + + def _find_first_dicom(self, download_dir): + """Finds a dicom from the series (if any) in the given directory. + + Args: + download_dir (:obj:`str`): The directory to search for dicoms. + + Returns: + str: The full path to a dicom, or None if no readable dicoms + exist in the folder. + """ + search_dir = self._find_series_dir(download_dir) + for root_dir, folder, files in os.walk(search_dir): + for item in files: + path = os.path.join(root_dir, item) + if is_dicom(path): + return path + + def _find_series_dir(self, search_dir): + """Find the directory a series was downloaded to, if any. + + If multiple series are downloaded to the same temporary directory + this will search for the expected downloaded path of this scan. + + Args: + search_dir (:obj:`str`): The full path to a directory to search. + + Returns: + str: The full path to this scan's download location. + """ + expected_path = os.path.join(search_dir, self.experiment, "scans") + found = glob.glob(os.path.join(expected_path, f"{self.series}-*")) + if not found: + return search_dir + if not os.path.exists(found[0]): + return search_dir + return found[0] + + def _fix_download_name(self, output_dir): + """Rename a downloaded XNAT-shared scan to match the expected label. + """ + orig_dir = os.path.join(output_dir, self.source_experiment) + try: + os.rename(orig_dir, + orig_dir.replace( + self.source_experiment, + self.experiment)) + except OSError: + for root, dirs, _ in os.walk(orig_dir): + for item in dirs: + try: + os.rename(os.path.join(root, item), + os.path.join( + root.replace( + self.source_experiment, + self.experiment), + item) + ) + except OSError: + pass + else: + shutil.rmtree(orig_dir) + return + + def __str__(self): + return f"" + + def __repr__(self): + return self.__str__() + + +############################################################################# +# Zip file classes + + +class ZipImporter(SessionImporter): + + def __init__(self, zip_path): + self.path = zip_path + self.name = zip_path + + # Does this need exception handling? Or allow calling class + # to do it? + headers = get_archive_headers(zip_path) + # Headers = dict[rel_path -> pydicom.dataset.FileDataset] + contents = {} + for path in headers: + dicom = headers[path] + # only need one date... but confirm all match? Or grab after + # constructing scan objects? + # Can also use AcquisitionDate, SeriesDate (?) + date = dicom.get('StudyDate') + series_description = dicom.get('SeriesDescription') + series = dicom.get('SeriesNumber') + contents[path] = { + 'date': date, + 'description': series_description, + 'series': series + } + # Still need to construct the ZipSeriesImporter class + # and also a way of assigning names like + # experiment.assign_scan_names(config, ident) so truly interchangeable + + # Use properties here to conform with SessionImporter interface + # and guarantee at creation that expected attributes exist + @property + def name(self) -> str: + return self._name + + @name.setter + def name(self, value: str): + self._name, _ = os.path.splitext(os.path.basename(value)) + + @property + def source_name(self) -> str: + # When using zip files, can't really track shared IDs so always + # equal name. + return self.name + + @source_name.setter + def source_name(self, value: str): + self.name = value + + @property + def scans(self) -> list['XNATScan']: + return self._scans + + @scans.setter + def scans(self, value: list['XNATScan']): + self._scans = value + + # @property + # def date(self) -> str: + # return self._date + + # @date.setter + # def date(self, value: str): + # self._date = value + + def is_shared(self) -> bool: + # Can't track shared sessions with zip files. + return False + + def extract(self, dest_path: str) -> str: + """Unpack the zip file at the given location. + + Args: + dest_path (str): The full path to the location to extract into. + + Returns: + list, list: A list of paths to each series' folder and a list + of paths to non-scan files bundled with the session. + """ + ##### May want to update this later to only extract series as needed + ##### but to grab all the folders and file info from the zip file + ##### before extract (I think we can read dicom headers in utils already) + + + with ZipFile(self.path, "r") as fh: + # Scan zips contain parent folder that holds all scan data. + # Grab it before extracting contents. + par_info = fh.filelist[0] + if par_info.is_dir(): + scan_dir = os.path.join(dest_path, par_info.filename) + else: + raise InputException("Malformed scan zip folder.") + fh.extractall(dest_path) + + scans = [] + resources = [] + for item in glob.glob(os.path.join(scan_dir, "*")): + if os.path.isdir(item): + scans.append(item) + else: + resources.append(item) + + return scans, resources + + +def ZipSeriesImporter(SeriesImporter): diff --git a/datman/xnat.py b/datman/xnat.py index 0d00184f..e2bbbca9 100644 --- a/datman/xnat.py +++ b/datman/xnat.py @@ -1,23 +1,18 @@ """Module to interact with the xnat server""" import getpass -import glob -import json import logging import os -import re import tempfile import time -import shutil import urllib.parse -from abc import ABC from xml.etree import ElementTree -from zipfile import ZipFile import requests -from datman.exceptions import UndefinedSetting, XnatException, ParseException -from datman.utils import is_dicom +from datman.exceptions import UndefinedSetting, XnatException +from datman.importers import XNATSubject, XNATExperiment, XNATScan + logger = logging.getLogger(__name__) @@ -1274,637 +1269,3 @@ def __str__(self): def __repr__(self): return self.__str__() - - -class XNATObject(ABC): - def _get_field(self, key): - if not self.raw_json.get("data_fields"): - return "" - return self.raw_json["data_fields"].get(key, "") - - -class XNATSubject(XNATObject): - def __init__(self, subject_json): - self.raw_json = subject_json - self.name = self._get_field("label") - self.project = self._get_field("project") - self.experiments = self._get_experiments() - - def _get_experiments(self): - experiments = [ - exp for exp in self.raw_json["children"] - if exp["field"] == "experiments/experiment" - ] - - if not experiments: - logger.debug(f"No experiments found for {self.name}") - return {} - - found = {} - for item in experiments[0]["items"]: - exper = XNATExperiment(self.project, self.name, item) - found[exper.name] = exper - - return found - - def __str__(self): - return f"" - - def __repr__(self): - return self.__str__() - - -class XNATExperiment(XNATObject): - def __init__(self, project, subject_name, experiment_json): - self.raw_json = experiment_json - self.project = project - self.subject = subject_name - self.uid = self._get_field("UID") - self.id = self._get_field("ID") - self.date = self._get_field("date") - - if self.is_shared(): - self.name = [label for label in self.get_alt_labels() - if self.subject in label][0] - self.source_name = self._get_field("label") - else: - self.name = self._get_field("label") - self.source_name = self.name - - # Scan attributes - self.scans = self._get_scans() - self.scan_UIDs = self._get_scan_UIDs() - self.scan_resource_IDs = self._get_scan_rIDs() - - # Resource attributes - self.resource_files = self._get_contents("resources/resource") - self.resource_IDs = self._get_resource_IDs() - - # Misc - basically just OPT CU1 needs this - self.misc_resource_IDs = self._get_other_resource_IDs() - - def _get_contents(self, data_type): - children = self.raw_json.get("children", []) - - contents = [ - child["items"] for child in children if child["field"] == data_type - ] - return contents - - def _get_scans(self): - scans = self._get_contents("scans/scan") - if not scans: - logger.debug(f"No scans found for experiment {self.name}") - return scans - xnat_scans = [] - for scan_json in scans[0]: - xnat_scans.append(XNATScan(self, scan_json)) - return xnat_scans - - def _get_scan_UIDs(self): - return [scan.uid for scan in self.scans] - - def _get_scan_rIDs(self): - # These can be used to download a series from xnat - resource_ids = [] - for scan in self.scans: - for child in scan.raw_json["children"]: - if child["field"] != "file": - continue - for item in child["items"]: - try: - label = item["data_fields"]["label"] - except KeyError: - continue - if label != "DICOM": - continue - r_id = item["data_fields"]["xnat_abstractresource_id"] - resource_ids.append(str(r_id)) - return resource_ids - - def _get_resource_IDs(self): - if not self.resource_files: - return {} - - resource_ids = {} - for resource in self.resource_files[0]: - label = resource["data_fields"].get("label", "No Label") - resource_ids[label] = str( - resource["data_fields"]["xnat_abstractresource_id"]) - return resource_ids - - def _get_other_resource_IDs(self): - """ - OPT's CU site uploads niftis to their server. These niftis are neither - classified as resources nor as scans so our code misses them entirely. - This functions grabs the abstractresource_id for these and - any other unique files aside from snapshots so they can be downloaded - """ - r_ids = [] - for scan in self.scans: - for child in scan.raw_json["children"]: - for file_upload in child["items"]: - data_fields = file_upload["data_fields"] - try: - label = data_fields["label"] - except KeyError: - # Some entries don't have labels. Only hold some header - # values. These are safe to ignore - continue - - try: - data_format = data_fields["format"] - except KeyError: - # Some entries have labels but no format... or neither - if not label: - # If neither, ignore. Should just be an entry - # containing scan parameters, etc. - continue - data_format = label - - try: - r_id = str(data_fields["xnat_abstractresource_id"]) - except KeyError: - # Some entries have labels and/or a format but no - # actual files and so no resource id. These can also be - # safely ignored. - continue - - # ignore DICOM, it's grabbed elsewhere. Ignore snapshots - # entirely. Some things may not be labelled DICOM but may - # be format 'DICOM' so that needs to be checked for too. - if label != "DICOM" and (data_format != "DICOM" - and label != "SNAPSHOTS"): - r_ids.append(r_id) - return r_ids - - def get_autorun_ids(self, xnat): - """Find the ID(s) of the 'autorun.xml' workflow - - XNAT has this obnoxious, on-by-default and seemingly impossible to - disable, 'workflow' called AutoRun.xml. It appears to do nothing other - than prevent certain actions (like renaming subjects/experiments) if - it is stuck in the running or queued state. This will grab the autorun - ID for this experiment so that it can be modified. - - Sometimes more than one pipeline gets launched for a subject even - though the GUI only reports one. This will grab the ID for all of them. - - Returns: - list: A list of string reference IDs that can be used to change - the status of the pipeline for this subject using XNAT's API, - or the empty string if the pipeline is not found. - - Raises: - XnatException: If no AutoRun.xml pipeline instance is found or - the API response can't be parsed. - """ - query_xml = """ - - wrk:workflowData - - wrk:workflowData - pipeline_name - 0 - string - wrk:workflowData/pipeline_name - - - wrk:workflowData - wrk_workflowData_id - 1 - string - wrk:workflowData/wrk_workflowData_id - - - - wrk:workflowData/ID - LIKE - {exp_id} - - - wrk:workflowData/ExternalID - = - {project} - - - wrk:workflowData/pipeline_name - = - xnat_tools/AutoRun.xml - - - - """.format(exp_id=self.id, project=self.project) # noqa: E501 - - query_url = f"{xnat.server}/data/search?format=json" - response = xnat._make_xnat_post(query_url, data=query_xml) - - if not response: - raise XnatException("AutoRun.xml pipeline not found.") - - try: - found_pipelines = json.loads(response) - except json.JSONDecodeError: - raise XnatException("Can't decode workflow query response.") - - try: - results = found_pipelines["ResultSet"]["Result"] - except KeyError: - return [] - - wf_ids = [item.get("workflow_id") for item in results] - - return wf_ids - - def get_resources(self, xnat_connection): - """ - Returns a list of all resource URIs from this session. - """ - resources = [] - resource_ids = list(self.resource_IDs.values()) - resource_ids.extend(self.misc_resource_IDs) - for r_id in resource_ids: - resource_list = xnat_connection.get_resource_list( - self.project, self.subject, self.name, r_id) - resources.extend([item["URI"] for item in resource_list]) - return resources - - def download(self, xnat, dest_folder, zip_name=None): - """ - Download a zip file containing all data for this session. Returns the - path to the new file if download is successful, raises an exception if - not - - Args: - xnat: An instance of datman.xnat.xnat() - dest_folder: The absolute path to the folder where the zip - should be deposited - zip_name: An optional name for the output zip file. If not - set the zip name will be session.name - - """ - resources_list = list(self.scan_resource_IDs) - resources_list.extend(self.misc_resource_IDs) - resources_list.extend(self.resource_IDs) - - if not resources_list: - raise ValueError(f"No scans or resources found for {self.name}") - - url = (f"{xnat.server}/REST/experiments/{self.id}/resources/" - f"{','.join(resources_list)}/files?structure=improved" - "&all=true&format=zip") - - if not zip_name: - zip_name = self.name.upper() + ".zip" - - output_path = os.path.join(dest_folder, zip_name) - if os.path.exists(output_path): - logger.error( - f"Cannot download {output_path}, file already exists.") - return output_path - - xnat._get_xnat_stream(url, output_path) - - return output_path - - def assign_scan_names(self, config, ident): - """Assign a datman style name to each scan in this experiment. - - This will populate the XnatScan.names and XnatScan.tags fields - for any scan that matches the study's export configuration. - - Args: - config (:obj:`datman.config.config`): A config object for the - study this experiment belongs to. - ident (:obj:`datman.scanid.Identifier`): A valid ID to apply - to this experiment's data. - """ - tags = config.get_tags(site=ident.site) - if not tags.series_map: - logger.error( - f"Failed to get tag export info for study {config.study_name}" - f" and site {ident.site}") - return - - for scan in self.scans: - try: - scan.set_datman_name(str(ident), tags) - except Exception as e: - logger.info( - f"Failed to make file name for series {scan.series} " - f"in session {str(ident)}. Reason {type(e).__name__}: " - f"{e}") - - def is_shared(self): - """Check if the experiment is shared from another project. - """ - alt_names = self.get_alt_labels() - if not alt_names: - return False - - return any([self.subject in label for label in alt_names]) - - def get_alt_labels(self): - """Find the names for all shared copies of the XNAT experiment. - """ - shared = self._get_contents("sharing/share") - if not shared: - return [] - return [item['data_fields']['label'] for item in shared[0]] - - def __str__(self): - return f"" - - def __repr__(self): - return self.__str__() - - -class XNATScan(XNATObject): - def __init__(self, experiment, scan_json): - self.project = experiment.project - self.subject = experiment.subject - self.experiment = experiment.name - self.shared = experiment.is_shared() - self.source_experiment = experiment.source_name - self.raw_json = scan_json - self.uid = self._get_field("UID") - self.series = self._get_field("ID") - self.image_type = self._get_field("parameters/imageType") - self.multiecho = self.is_multiecho() - self.description = self._set_description() - self.type = self._get_field("type") - self.names = [] - self.tags = [] - self.download_dir = None - - def _set_description(self): - series_descr = self._get_field("series_description") - if series_descr: - return series_descr - return self._get_field("type") - - def is_multiecho(self): - try: - child = self.raw_json["children"][0]["items"][0] - except (KeyError, IndexError): - return False - name = child["data_fields"].get("name") - if name and "MultiEcho" in name: - return True - return False - - def raw_dicoms_exist(self): - for child in self.raw_json["children"]: - for item in child["items"]: - file_type = item["data_fields"].get("content") - if file_type == "RAW": - return True - return False - - def is_derived(self): - if not self.image_type: - logger.warning( - f"Image type could not be found for series {self.series}. " - "Assuming it's not derived.") - return False - if "DERIVED" in self.image_type: - return True - return False - - def set_tag(self, tag_map): - matches = {} - for tag, pattern in tag_map.items(): - - if 'SeriesDescription' in pattern: - regex = pattern['SeriesDescription'] - search_target = self.description - elif 'XnatType' in pattern: - regex = pattern['XnatType'] - search_target = self.type - else: - raise KeyError( - "Missing keys 'SeriesDescription' or 'XnatType'" - " for Pattern!") - - if isinstance(regex, list): - regex = "|".join(regex) - if re.search(regex, search_target, re.IGNORECASE): - matches[tag] = pattern - - if len(matches) == 1 or (len(matches) == 2 and self.multiecho): - self.tags = list(matches.keys()) - return matches - return self._set_fmap_tag(tag_map, matches) - - def _set_fmap_tag(self, tag_map, matches): - try: - for tag, pattern in tag_map.items(): - if tag in matches: - if not re.search(pattern["ImageType"], self.image_type): - del matches[tag] - except Exception: - matches = {} - - if len(matches) > 2 or (len(matches) == 2 and not self.multiecho): - matches = {} - self.tags = list(matches.keys()) - return matches - - def set_datman_name(self, base_name, tags): - mangled_descr = self._mangle_descr() - padded_series = self.series.zfill(2) - tag_settings = self.set_tag(tags.series_map) - if not tag_settings: - raise ParseException( - f"Can't identify tag for series {self.series}") - names = [] - self.echo_dict = {} - for tag in tag_settings: - name = "_".join([base_name, tag, padded_series, mangled_descr]) - if self.multiecho: - echo_num = tag_settings[tag]["EchoNumber"] - if echo_num not in self.echo_dict: - self.echo_dict[echo_num] = name - names.append(name) - - if len(self.tags) > 1 and not self.multiecho: - logger.error(f"Multiple export patterns match for {base_name}, " - f"descr: {self.description}, tags: {self.tags}") - names = [] - self.tags = [] - - self.names = names - return names - - def _mangle_descr(self): - if not self.description: - return "" - return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description) - - def is_usable(self, strict=False): - if not self.raw_dicoms_exist(): - logger.debug(f"Ignoring {self.series} for {self.experiment}. " - f"No RAW dicoms exist.") - return False - - if not self.description: - logger.error(f"Can't find description for series {self.series} " - f"from session {self.experiment}.") - return False - - if not strict: - return True - - if self.is_derived(): - logger.debug( - f"Series {self.series} in session {self.experiment} is a " - "derived scan. Ignoring.") - return False - - if not self.names: - return False - - return True - - def download(self, xnat_conn, output_dir): - """Download all dicoms for this series. - - This will download all files in the series, and if successful, - set the download_dir attribute to the destination folder. - - Args: - xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection - to the server to download from. - output_dir (:obj:`str`): The full path to the location to - download all files to. - - Returns: - bool: True if the series was downloaded, False otherwise. - """ - logger.info(f"Downloading dicoms for {self.experiment} series: " - f"{self.series}.") - - if self.download_dir: - logger.debug( - "Data has been previously downloaded, skipping redownload.") - return True - - try: - dicom_zip = xnat_conn.get_dicom(self.project, self.subject, - self.experiment, self.series) - except Exception as e: - logger.error(f"Failed to download dicom archive for {self.subject}" - f" series {self.series}. Reason - {e}") - return False - - if os.path.getsize(dicom_zip) == 0: - logger.error( - f"Server returned an empty file for series {self.series} in " - f"session {self.experiment}. This may be a server error." - ) - os.remove(dicom_zip) - return False - - logger.info(f"Unpacking archive {dicom_zip}") - - try: - with ZipFile(dicom_zip, "r") as fh: - fh.extractall(output_dir) - except Exception as e: - logger.error("An error occurred unpacking dicom archive for " - f"{self.experiment}'s series {self.series}' - {e}") - os.remove(dicom_zip) - return False - else: - logger.info("Unpacking complete. Deleting archive file " - f"{dicom_zip}") - os.remove(dicom_zip) - - if self.shared: - self._fix_download_name(output_dir) - - dicom_file = self._find_first_dicom(output_dir) - - try: - self.download_dir = os.path.dirname(dicom_file) - except TypeError: - logger.warning("No valid dicom files found in XNAT session " - f"{self.subject} series {self.series}.") - return False - return True - - def _find_first_dicom(self, download_dir): - """Finds a dicom from the series (if any) in the given directory. - - Args: - download_dir (:obj:`str`): The directory to search for dicoms. - - Returns: - str: The full path to a dicom, or None if no readable dicoms - exist in the folder. - """ - search_dir = self._find_series_dir(download_dir) - for root_dir, folder, files in os.walk(search_dir): - for item in files: - path = os.path.join(root_dir, item) - if is_dicom(path): - return path - - def _find_series_dir(self, search_dir): - """Find the directory a series was downloaded to, if any. - - If multiple series are downloaded to the same temporary directory - this will search for the expected downloaded path of this scan. - - Args: - search_dir (:obj:`str`): The full path to a directory to search. - - Returns: - str: The full path to this scan's download location. - """ - expected_path = os.path.join(search_dir, self.experiment, "scans") - found = glob.glob(os.path.join(expected_path, f"{self.series}-*")) - if not found: - return search_dir - if not os.path.exists(found[0]): - return search_dir - return found[0] - - def _fix_download_name(self, output_dir): - """Rename a downloaded XNAT-shared scan to match the expected label. - """ - orig_dir = os.path.join(output_dir, self.source_experiment) - try: - os.rename(orig_dir, - orig_dir.replace( - self.source_experiment, - self.experiment)) - except OSError: - for root, dirs, _ in os.walk(orig_dir): - for item in dirs: - try: - os.rename(os.path.join(root, item), - os.path.join( - root.replace( - self.source_experiment, - self.experiment), - item) - ) - except OSError: - pass - else: - shutil.rmtree(orig_dir) - return - - def __str__(self): - return f"" - - def __repr__(self): - return self.__str__() From fd53d07ef6cb9cdfeb20da86a1f271f44da85ba8 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 4 Mar 2025 21:57:15 -0500 Subject: [PATCH 02/45] [ENH] Add missing methods to zip importers. Still need testing --- datman/importers.py | 281 ++++++++++++++++++++++++++++++-------------- 1 file changed, 192 insertions(+), 89 deletions(-) diff --git a/datman/importers.py b/datman/importers.py index f6d99628..88618a0b 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -76,6 +76,34 @@ def is_shared(self) -> bool: """ pass + def assign_scan_names(self, config, ident): + """Assign a datman style name to each scan in this experiment. + + This will populate the names and tags fields for any scan that + matches the study's export configuration. + + Args: + config (:obj:`datman.config.config`): A config object for the + study this experiment belongs to. + ident (:obj:`datman.scanid.Identifier`): A valid ID to apply + to this experiment's data. + """ + tags = config.get_tags(site=ident.site) + if not tags.series_map: + logger.error( + f"Failed to get tag export info for study {config.study_name}" + f" and site {ident.site}") + return + + for scan in self.scans: + try: + scan.set_datman_name(str(ident), tags) + except Exception as e: + logger.info( + f"Failed to make file name for series {scan.series} " + f"in session {str(ident)}. Reason {type(e).__name__}: " + f"{e}") + class SeriesImporter(ABC): # XNATScan attributes and methods used by exporters... @@ -115,6 +143,7 @@ def subject(self) -> str: def description(self) -> str: """The series description (as from the dicom headers). """ + pass @property @abstractmethod @@ -123,6 +152,13 @@ def names(self) -> list[str]: """ pass + def _mangle_descr(self) -> str: + """Modify a series description to remove non-alphanumeric characters. + """ + if not self.description: + return "" + return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description) + ############################################################################### #### XNAT classes, formerly in xnat.py @@ -214,11 +250,11 @@ def source_name(self, value: str): self._source_name = value @property - def scans(self) -> list['XNATScan']: + def scans(self) -> list['SeriesImporter']: return self._scans @scans.setter - def scans(self, value: list['XNATScan']): + def scans(self, value: list['SeriesImporter']): self._scans = value @property @@ -460,34 +496,6 @@ def download(self, xnat, dest_folder, zip_name=None): return output_path - def assign_scan_names(self, config, ident): - """Assign a datman style name to each scan in this experiment. - - This will populate the XnatScan.names and XnatScan.tags fields - for any scan that matches the study's export configuration. - - Args: - config (:obj:`datman.config.config`): A config object for the - study this experiment belongs to. - ident (:obj:`datman.scanid.Identifier`): A valid ID to apply - to this experiment's data. - """ - tags = config.get_tags(site=ident.site) - if not tags.series_map: - logger.error( - f"Failed to get tag export info for study {config.study_name}" - f" and site {ident.site}") - return - - for scan in self.scans: - try: - scan.set_datman_name(str(ident), tags) - except Exception as e: - logger.info( - f"Failed to make file name for series {scan.series} " - f"in session {str(ident)}. Reason {type(e).__name__}: " - f"{e}") - def is_shared(self) -> bool: """Check if the experiment is shared from another project. """ @@ -663,11 +671,6 @@ def set_datman_name(self, base_name, tags): self.names = names return names - def _mangle_descr(self): - if not self.description: - return "" - return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description) - def is_usable(self, strict=False): if not self.raw_dicoms_exist(): logger.debug(f"Ignoring {self.series} for {self.experiment}. " @@ -836,31 +839,17 @@ def __repr__(self): class ZipImporter(SessionImporter): - def __init__(self, zip_path): + def __init__(self, ident, zip_path): + # Would be good to not need ident here... + self.ident = ident self.path = zip_path self.name = zip_path - # Does this need exception handling? Or allow calling class - # to do it? - headers = get_archive_headers(zip_path) - # Headers = dict[rel_path -> pydicom.dataset.FileDataset] - contents = {} - for path in headers: - dicom = headers[path] - # only need one date... but confirm all match? Or grab after - # constructing scan objects? - # Can also use AcquisitionDate, SeriesDate (?) - date = dicom.get('StudyDate') - series_description = dicom.get('SeriesDescription') - series = dicom.get('SeriesNumber') - contents[path] = { - 'date': date, - 'description': series_description, - 'series': series - } - # Still need to construct the ZipSeriesImporter class - # and also a way of assigning names like - # experiment.assign_scan_names(config, ident) so truly interchangeable + self.contents = self.parse_contents() + self.scans = self.get_scans() + self.resources = self.contents['resources'] + + self.date = self.scans[0].datess # Use properties here to conform with SessionImporter interface # and guarantee at creation that expected attributes exist @@ -883,20 +872,20 @@ def source_name(self, value: str): self.name = value @property - def scans(self) -> list['XNATScan']: + def scans(self) -> list['SeriesImporter']: return self._scans @scans.setter - def scans(self, value: list['XNATScan']): + def scans(self, value: list['SeriesImporter']): self._scans = value - # @property - # def date(self) -> str: - # return self._date + @property + def date(self) -> str: + return self._date - # @date.setter - # def date(self, value: str): - # self._date = value + @date.setter + def date(self, value: str): + self._date = value def is_shared(self) -> bool: # Can't track shared sessions with zip files. @@ -907,35 +896,149 @@ def extract(self, dest_path: str) -> str: Args: dest_path (str): The full path to the location to extract into. - - Returns: - list, list: A list of paths to each series' folder and a list - of paths to non-scan files bundled with the session. """ - ##### May want to update this later to only extract series as needed - ##### but to grab all the folders and file info from the zip file - ##### before extract (I think we can read dicom headers in utils already) - + for item in self.scans: + item.extract(dest_path) + self.extract_resources(dest_path) + def extract_resources(self, dest_path: str): with ZipFile(self.path, "r") as fh: - # Scan zips contain parent folder that holds all scan data. - # Grab it before extracting contents. - par_info = fh.filelist[0] - if par_info.is_dir(): - scan_dir = os.path.join(dest_path, par_info.filename) - else: - raise InputException("Malformed scan zip folder.") - fh.extractall(dest_path) + for item in self.resources: + fh.extract(item, path=dest_path) + + def parse_contents(self): + contents = { + 'scans': {}, + 'resources': [] + } + with ZipFile(self.path, "r") as fh: + par_dir = fh.filelist[0].filename.strip('/') + for item in fh.filelist[1:]: + if item.is_dir(): + contents['scans'].setdefault(item.filename.strip('/'), []) + else: + folder, _ = os.path.split(item.filename) + if folder == par_dir: + contents['resources'].append(item.filename) + else: + contents['scans'].setdefault(folder, []).append( + item.filename) + return contents + def get_scans(self): + # Headers = dict[rel_path -> pydicom.dataset.FileDataset] + headers = get_archive_headers(self.path) scans = [] - resources = [] - for item in glob.glob(os.path.join(scan_dir, "*")): - if os.path.isdir(item): - scans.append(item) - else: - resources.append(item) + for sub_path in headers: + # .get_full_subjectid may need to be changed for compatibility + scans.append( + ZipSeriesImporter( + self.ident.get_full_subjectid(), self.path, sub_path, + headers[sub_path], self.contents['scans'][sub_path] + ) + ) + return scans + + def __str__(self): + return f" str: + return self._series + @series.setter + def series(self, value: str): + self._series = value + + @property + def subject(self) -> str: + return self._subject + + @subject.setter + def subject(self, value: str): + self._subject = value + + @property + def description(self) -> str: + return self._description + + @description.setter + def description(self, value: str): + self._description = value -def ZipSeriesImporter(SeriesImporter): + @property + def names(self) -> list[str]: + return self._names + + @names.setter + def names(self, value: list[str]): + self._names = value + + def extract(self, output_dir: str): + with ZipFile(self.zip_file, "r") as fh: + for item in self.contents: + fh.extract(item, path=output_dir) + self.download_dir = os.path.join(output_dir, self.dcm_dir) + + def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo' + ) -> list[str]: + mangled_descr = self._mangle_descr() + tag_settings = self.set_tag(tags.series_map) + if not tag_settings: + raise ParseException( + f"Can't identify tag for series {self.series}") + + names = [] + for tag in tag_settings: + names.append( + "_".join([base_name, tag, self.series.zfill(2), mangled_descr]) + ) + + self.names = names + return names + + def set_tag(self, tag_map): + matches = {} + for tag, pattern in tag_map.items(): + if 'SeriesDescription' not in pattern: + raise KeyError( + "Missing key 'SeriesDescription' for 'Pattern'!") + + regex = pattern['SeriesDescription'] + if isinstance(regex, list): + regex = "|".join(regex) + + if re.search(regex, self.description, re.IGNORECASE): + matches[tag] = pattern + + if (len(matches) == 1 or + all(['EchoNumber' in matches[tag] for tag in matches])): + self.tags = list(matches.keys()) + return matches + + def __str__(self): + return f"" + + def __repr__(self): + return self.__str__() \ No newline at end of file From f6ce3abf96aeb5136d82fd95f5d900114a61dde9 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 10 Mar 2025 22:45:40 -0400 Subject: [PATCH 03/45] [ENH] Update extract to allow zip files to be used as input --- bin/dm_xnat_extract.py | 125 ++++++++++++++++++++++++++++---------- datman/exporters.py | 4 +- datman/importers.py | 133 +++++++++++++++++++++++++++++++---------- 3 files changed, 197 insertions(+), 65 deletions(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index 2268c67b..fdb10e40 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -44,15 +44,18 @@ """ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +import glob import logging import os import platform import shutil import sys +from zipfile import BadZipFile import datman.config import datman.exceptions import datman.exporters +import datman.importers import datman.scan import datman.scanid import datman.xnat @@ -137,30 +140,20 @@ def main(): else: bids_opts = None - auth = datman.xnat.get_auth(args.username) if args.username else None + sessions = get_sessions(config, args) - if args.experiment: - experiments = collect_experiment( - config, args.experiment, args.study, auth=auth, url=args.server) - else: - experiments = collect_all_experiments( - config, auth=auth, url=args.server) + logger.info(f"Found {len(session)} sessions for study {args.study}") - logger.info(f"Found {len(experiments)} experiments for study {args.study}") + for xnat, importer in sessions: + session = datman.scan.Scan(importer._ident, config, + bids_root=args.bids_out) - for xnat, project, ident in experiments: - xnat_experiment = get_xnat_experiment(xnat, project, ident) - if not xnat_experiment: - continue - - session = datman.scan.Scan(ident, config, bids_root=args.bids_out) - - if xnat_experiment.resource_files: - export_resources(session.resource_path, xnat, xnat_experiment, + if importer.resource_files: + export_resources(session.resource_path, xnat, importer, dry_run=args.dry_run) - if xnat_experiment.scans: - export_scans(config, xnat, xnat_experiment, session, + if importer.scans: + export_scans(config, xnat, importer, session, bids_opts=bids_opts, dry_run=args.dry_run, ignore_db=args.dont_update_dashboard, wanted_tags=args.tag) @@ -186,7 +179,7 @@ def _is_file(path, parser): ) g_main = parser.add_argument_group( - "Options for choosing data from XNAT to extract" + "Options for choosing data to extract" ) g_main.add_argument( "study", @@ -234,6 +227,12 @@ def _is_file(path, parser): "--use-dcm2bids", action="store_true", default=False, help="Pull xnat data and convert to bids using dcm2bids" ) + g_main.add_argument( + "--use-zips", action="store", metavar="ZIP_DIR", + nargs="?", default="USE_XNAT", + help="A directory of zip files to use instead of pulling from XNAT. " + "If not provided the study's 'dicom' dir will be used instead." + ) g_dcm2bids = parser.add_argument_group( "Options for using dcm2bids" @@ -335,6 +334,55 @@ def configure_logging(study, log_level): logging.getLogger('datman.exporters').addHandler(ch) +def get_sessions(config, args): + if args.use_zips != "USE_XNAT": + return collect_zips(config, args) + + auth = datman.xnat.get_auth(args.username) if args.username else None + + if args.experiment: + return collect_experiment( + config, args.experiment, args.study, auth=auth, url=args.server) + + return collect_all_experiments(config, auth=auth, url=args.server) + + +def collect_zips(config, args): + if args.use_zips is None: + zip_folder = config.get_path("dicom") + else: + zip_folder = args.use_zips + + if not os.path.exists(zip_folder): + logger.error(f"Zip file directory not found: {zip_folder}") + return [] + + if args.experiment: + ident = get_identifier(config, args.experiment) + if not ident: + logger.error(f"Invalid session ID {args.experiment}.") + return [] + + zip_path = os.path.join(zip_folder, str(ident) + ".zip") + if not os.path.exists(zip_path): + logger.error(f"Zip file not found: {zip_path}") + return + + return [None, datman.importers.ZipImporter(ident, zip_path)] + + zip_files = [] + for zip_path in glob.glob(os.path.join(zip_folder, "*.zip")): + sess_name = os.path.basename(zip_path).replace(".zip", "") + ident = get_identifier(config, sess_name) + if not ident: + logger.error( + f"Ignoring invalid zip file name in dicom dir: {sess_name}") + continue + zip_files.append([None, datman.importers.ZipImporter(ident, zip_path)]) + + return zip_files + + def collect_experiment(config, experiment_id, study, url=None, auth=None): ident = get_identifier(config, experiment_id) xnat = datman.xnat.get_connection( @@ -349,6 +397,10 @@ def collect_experiment(config, experiment_id, study, url=None, auth=None): f"Ensure it matches an existing experiment ID.") return [] + experiment = get_xnat_experiment(xnat, xnat_project, ident) + if not experiment: + return [] + return [(xnat, xnat_project, ident)] @@ -445,9 +497,9 @@ def get_xnat_experiment(xnat, project, ident): return xnat_experiment -def export_resources(resource_dir, xnat, xnat_experiment, dry_run=False): - logger.info(f"Extracting {len(xnat_experiment.resource_files)} resources " - f"from {xnat_experiment.name}") +def export_resources(resource_dir, xnat, importer, dry_run=False): + logger.info(f"Extracting {len(importer.resource_files)} resources " + f"from {importer.name}") if not os.path.isdir(resource_dir): logger.info(f"Creating resources dir {resource_dir}") @@ -457,6 +509,12 @@ def export_resources(resource_dir, xnat, xnat_experiment, dry_run=False): logger.error(f"Failed creating resources dir {resource_dir}") return + if isinstance(importer, datman.importers.ZipImporter): + importer.get_resources(resource_dir) + return + + xnat_experiment = importer + for label in xnat_experiment.resource_IDs: if label == "No Label": target_path = os.path.join(resource_dir, "MISC") @@ -549,7 +607,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id, return target_path -def export_scans(config, xnat, xnat_experiment, session, bids_opts=None, +def export_scans(config, xnat, importer, session, bids_opts=None, wanted_tags=None, ignore_db=False, dry_run=False): """Export all XNAT data for a session to desired formats. @@ -558,8 +616,9 @@ def export_scans(config, xnat, xnat_experiment, session, bids_opts=None, the study the experiment belongs to. xnat (:obj:`datman.xnat.xnat`): An XNAT connection for the server the experiment resides on. - xnat_experiment (:obj:`datman.xnat.XNATExperiment`): The experiment - to download, extract and export. + importer (:obj:`datman.importer.SessionImporter`): An instance of + a SessionImporter that holds all information needed to get + scans data. session (:obj:`datman.scan.Scan`): The datman session this experiment belongs to. bids_opts (:obj:`BidsOptions`, optional): dcm2bids settings to be @@ -574,28 +633,28 @@ def export_scans(config, xnat, xnat_experiment, session, bids_opts=None, """ logger.info(f"Processing scans in experiment {xnat_experiment.name}") - xnat_experiment.assign_scan_names(config, session._ident) + importer.assign_scan_names(config, session._ident) session_exporters = make_session_exporters( - config, session, xnat_experiment, bids_opts=bids_opts, + config, session, importer, bids_opts=bids_opts, ignore_db=ignore_db, dry_run=dry_run) series_exporters = make_all_series_exporters( - config, session, xnat_experiment, bids_opts=bids_opts, + config, session, importer, bids_opts=bids_opts, wanted_tags=wanted_tags, dry_run=dry_run ) if not needs_export(session_exporters) and not series_exporters: - logger.debug(f"Session {xnat_experiment} already extracted. Skipping.") + logger.debug(f"Session {importer} already extracted. Skipping.") return with make_temp_directory(prefix="dm_xnat_extract_") as temp_dir: - for scan in xnat_experiment.scans: + for scan in importer.scans: if needs_download(scan, session_exporters, series_exporters): - scan.download(xnat, temp_dir) + scan.get_files(temp_dir, xnat) for exporter in series_exporters.get(scan, []): - exporter.export(scan.download_dir) + exporter.export(scan.dcm_dir) for exporter in session_exporters: try: diff --git a/datman/exporters.py b/datman/exporters.py index a78efee3..1bd77311 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -183,7 +183,7 @@ class BidsExporter(SessionExporter): type = "bids" def __init__(self, config, session, experiment, bids_opts=None, **kwargs): - self.exp_label = experiment.name + self.dcm_dir = experiment.dcm_dir self.bids_sub = session._ident.get_bids_name() self.bids_ses = session._ident.timepoint self.repeat = session._ident.session @@ -379,7 +379,7 @@ def _get_scan_dir(self, download_dir): f"sub-{self.bids_sub}_ses-{self.bids_ses}" ) return tmp_dir - return os.path.join(download_dir, self.exp_label, "scans") + return os.path.join(download_dir, self.dcm_dir) def outputs_exist(self): if self.refresh: diff --git a/datman/importers.py b/datman/importers.py index 88618a0b..9716773c 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -5,6 +5,7 @@ create the files and database contents users may actually interact with. """ from abc import ABC, abstractmethod +from datetime import datetime import glob import json import logging @@ -70,6 +71,13 @@ def scans(self) -> list['SeriesImporter']: """ pass + @property + @abstractmethod + def dcm_dir(self) -> str: + """The subfolder that will hold the session's dicom dirs. + """ + pass + @abstractmethod def is_shared(self) -> bool: """Indicates whether the session is shared with other projects. @@ -116,6 +124,12 @@ class SeriesImporter(ABC): # scan.download_dir # xnat copy for example points to: /scratch/dawn/temp_stuff/export_zip/xnat_copy/SPN10_CMH_0083_01_SE01_MR/scans/6-t1_mprage_T1_900/resources/DICOM/files # unzipped copy would be (diff session): 20190116_Ex09352_ASND1MR_ASQB002/Ex09352_Se00003_SagT1Bravo-1mm-32ch/ + @property + @abstractmethod + def dcm_dir(self) -> str: + """Full path to the folder that holds a local copy of the dicom files. + """ + pass @property @abstractmethod @@ -152,6 +166,11 @@ def names(self) -> list[str]: """ pass + @abstractmethod + def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo' + ) -> list[str]: + pass + def _mangle_descr(self) -> str: """Modify a series description to remove non-alphanumeric characters. """ @@ -219,6 +238,9 @@ def __init__(self, project, subject_name, experiment_json): self.name = self._get_field("label") self.source_name = self.name + # The subdirectory to find the dicoms in after download + self.dcm_dir = os.path.join(self.name, "scans") + # Scan attributes self.scans = self._get_scans() self.scan_UIDs = self._get_scan_UIDs() @@ -265,6 +287,14 @@ def date(self) -> str: def date(self, value: str): self._date = value + @property + def dcm_dir(self) -> str: + return self._dcm_dir + + @dcm_dir.setter + def dcm_dir(self, value: str): + self._dcm_dir = value + def _get_contents(self, data_type): children = self.raw_json.get("children", []) @@ -458,16 +488,16 @@ def get_resources(self, xnat_connection): resources.extend([item["URI"] for item in resource_list]) return resources - def download(self, xnat, dest_folder, zip_name=None): + def get_files(self, dest_folder, xnat, zip_name=None): """ Download a zip file containing all data for this session. Returns the path to the new file if download is successful, raises an exception if not Args: - xnat: An instance of datman.xnat.xnat() dest_folder: The absolute path to the folder where the zip should be deposited + xnat: An instance of datman.xnat.xnat() zip_name: An optional name for the output zip file. If not set the zip name will be session.name @@ -536,10 +566,18 @@ def __init__(self, experiment, scan_json): self.type = self._get_field("type") self.names = [] self.tags = [] - self.download_dir = None + self.dcm_dir = None # Use properties here to conform with SeriesImporter interface # and guarantee at creation that expected attributes exist + @property + def dcm_dir(self) ->str: + return self._dcm_dir + + @dcm_dir.property + def dcm_dir(self, value: str): + self.dcm_dir = value + @property def series(self) -> str: return self._series @@ -696,17 +734,17 @@ def is_usable(self, strict=False): return True - def download(self, xnat_conn, output_dir): + def get_files(self, output_dir, xnat_conn): """Download all dicoms for this series. This will download all files in the series, and if successful, - set the download_dir attribute to the destination folder. + set the dcm_dir attribute to the destination folder. Args: - xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection - to the server to download from. output_dir (:obj:`str`): The full path to the location to download all files to. + xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection + to the server to download from. Returns: bool: True if the series was downloaded, False otherwise. @@ -714,7 +752,7 @@ def download(self, xnat_conn, output_dir): logger.info(f"Downloading dicoms for {self.experiment} series: " f"{self.series}.") - if self.download_dir: + if self.dcm_dir: logger.debug( "Data has been previously downloaded, skipping redownload.") return True @@ -756,24 +794,24 @@ def download(self, xnat_conn, output_dir): dicom_file = self._find_first_dicom(output_dir) try: - self.download_dir = os.path.dirname(dicom_file) + self.dcm_dir = os.path.dirname(dicom_file) except TypeError: logger.warning("No valid dicom files found in XNAT session " f"{self.subject} series {self.series}.") return False return True - def _find_first_dicom(self, download_dir): + def _find_first_dicom(self, dcm_dir): """Finds a dicom from the series (if any) in the given directory. Args: - download_dir (:obj:`str`): The directory to search for dicoms. + dcm_dir (:obj:`str`): The directory to search for dicoms. Returns: str: The full path to a dicom, or None if no readable dicoms exist in the folder. """ - search_dir = self._find_series_dir(download_dir) + search_dir = self._find_series_dir(dcm_dir) for root_dir, folder, files in os.walk(search_dir): for item in files: path = os.path.join(root_dir, item) @@ -841,15 +879,19 @@ class ZipImporter(SessionImporter): def __init__(self, ident, zip_path): # Would be good to not need ident here... - self.ident = ident + self._ident = ident self.path = zip_path self.name = zip_path - self.contents = self.parse_contents() self.scans = self.get_scans() self.resources = self.contents['resources'] - - self.date = self.scans[0].datess + self.dcm_dir = os.path.split(self.scans[0].dcm_dir)[0] + try: + # Convert date to same format XNAT gives + self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d")) + except ValueError: + logger.error("Unexpected date format in dicom header.") + self.date = self.scans[0].date # Use properties here to conform with SessionImporter interface # and guarantee at creation that expected attributes exist @@ -887,21 +929,29 @@ def date(self) -> str: def date(self, value: str): self._date = value + @property + def dcm_dir(self) -> str: + return self._dcm_dir + + @dcm_dir.setter + def dcm_dir(self, value: str): + self._dcm_dir = value + def is_shared(self) -> bool: # Can't track shared sessions with zip files. return False - def extract(self, dest_path: str) -> str: + def get_files(self, dest_path: str, *args) -> str: """Unpack the zip file at the given location. Args: dest_path (str): The full path to the location to extract into. """ for item in self.scans: - item.extract(dest_path) + item.get_files(dest_path) self.extract_resources(dest_path) - def extract_resources(self, dest_path: str): + def get_resources(self, dest_path: str): with ZipFile(self.path, "r") as fh: for item in self.resources: fh.extract(item, path=dest_path) @@ -928,16 +978,39 @@ def parse_contents(self): def get_scans(self): # Headers = dict[rel_path -> pydicom.dataset.FileDataset] headers = get_archive_headers(self.path) - scans = [] + # scans = [] + # for sub_path in headers: + # # .get_full_subjectid may need to be changed for compatibility + # scans.append( + # ZipSeriesImporter( + # self.ident.get_full_subjectid(), self.path, sub_path, + # headers[sub_path], self.contents['scans'][sub_path] + # ) + # ) + # return scans + scans = {} + duplicate_series = set() for sub_path in headers: # .get_full_subjectid may need to be changed for compatibility - scans.append( - ZipSeriesImporter( - self.ident.get_full_subjectid(), self.path, sub_path, + zip_scan = ZipSeriesImporter( + self._ident.get_full_subjectid(), self.path, sub_path, headers[sub_path], self.contents['scans'][sub_path] - ) ) - return scans + if zip_scan.series in scans: + duplicate_series.add(zip_scan.series) + else: + scans[zip_scan.series] = zip_scan + + # Omit scans when more than one has the same series num (can't handle + # these...) + if duplicate_series: + logger.error("Duplicate series present in zip file. " + f"Ignoring: {duplicate_series}") + + for series in duplicate_series: + del scans[series] + + return list(scans.values()) def __str__(self): return f" list[str]: def names(self, value: list[str]): self._names = value - def extract(self, output_dir: str): + def get_files(self, output_dir: str, *args): with ZipFile(self.zip_file, "r") as fh: for item in self.contents: fh.extract(item, path=output_dir) - self.download_dir = os.path.join(output_dir, self.dcm_dir) + self.dcm_dir = os.path.join(output_dir, self.series_dir) def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo' ) -> list[str]: @@ -1041,4 +1114,4 @@ def __str__(self): return f"" def __repr__(self): - return self.__str__() \ No newline at end of file + return self.__str__() From b5253e7eda4f60c4504390a7b19f2f2df8189e43 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 12 Mar 2025 13:16:42 -0400 Subject: [PATCH 04/45] [FIX] Bug fixes (name issues, repeated resource exports) --- bin/dm_xnat_extract.py | 16 ++++++++++------ datman/exporters.py | 1 + datman/importers.py | 27 ++++++++++++++++++++++----- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index fdb10e40..ee2a9880 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -142,7 +142,7 @@ def main(): sessions = get_sessions(config, args) - logger.info(f"Found {len(session)} sessions for study {args.study}") + logger.info(f"Found {len(sessions)} sessions for study {args.study}") for xnat, importer in sessions: session = datman.scan.Scan(importer._ident, config, @@ -366,9 +366,9 @@ def collect_zips(config, args): zip_path = os.path.join(zip_folder, str(ident) + ".zip") if not os.path.exists(zip_path): logger.error(f"Zip file not found: {zip_path}") - return + return [] - return [None, datman.importers.ZipImporter(ident, zip_path)] + return [(None, datman.importers.ZipImporter(ident, zip_path))] zip_files = [] for zip_path in glob.glob(os.path.join(zip_folder, "*.zip")): @@ -378,7 +378,9 @@ def collect_zips(config, args): logger.error( f"Ignoring invalid zip file name in dicom dir: {sess_name}") continue - zip_files.append([None, datman.importers.ZipImporter(ident, zip_path)]) + zip_files.append( + (None, datman.importers.ZipImporter(ident, zip_path)) + ) return zip_files @@ -510,7 +512,9 @@ def export_resources(resource_dir, xnat, importer, dry_run=False): return if isinstance(importer, datman.importers.ZipImporter): - importer.get_resources(resource_dir) + for item in importer.resource_files: + if not os.path.exists(item): + importer.get_resources(resource_dir, item) return xnat_experiment = importer @@ -631,7 +635,7 @@ def export_scans(config, xnat, importer, session, bids_opts=None, dry_run (bool, optional): If True, no outputs will be made. Defaults to False. """ - logger.info(f"Processing scans in experiment {xnat_experiment.name}") + logger.info(f"Processing scans in experiment {importer.name}") importer.assign_scan_names(config, session._ident) diff --git a/datman/exporters.py b/datman/exporters.py index 1bd77311..339d8b3d 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -31,6 +31,7 @@ filter_niftis, find_tech_notes, read_blacklist, get_relative_source, read_json, write_json) + try: from dcm2bids import dcm2bids, Dcm2bids from dcm2bids.sidecar import Acquisition diff --git a/datman/importers.py b/datman/importers.py index 9716773c..68256776 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -574,9 +574,9 @@ def __init__(self, experiment, scan_json): def dcm_dir(self) ->str: return self._dcm_dir - @dcm_dir.property + @dcm_dir.setter def dcm_dir(self, value: str): - self.dcm_dir = value + self._dcm_dir = value @property def series(self) -> str: @@ -885,10 +885,12 @@ def __init__(self, ident, zip_path): self.contents = self.parse_contents() self.scans = self.get_scans() self.resources = self.contents['resources'] - self.dcm_dir = os.path.split(self.scans[0].dcm_dir)[0] + # For compatibility (fix later) + self.resource_files = self.resources + self.dcm_dir = os.path.split(self.scans[0].series_dir)[0] try: # Convert date to same format XNAT gives - self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d")) + self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d").date()) except ValueError: logger.error("Unexpected date format in dicom header.") self.date = self.scans[0].date @@ -951,8 +953,11 @@ def get_files(self, dest_path: str, *args) -> str: item.get_files(dest_path) self.extract_resources(dest_path) - def get_resources(self, dest_path: str): + def get_resources(self, dest_path: str, fname: str = None): with ZipFile(self.path, "r") as fh: + if fname: + fh.extract(fname, path=dest_path) + return for item in self.resources: fh.extract(item, path=dest_path) @@ -1033,9 +1038,18 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items): self.uid = str(header.get('StudyInstanceUID')) self.image_type = "////".join(header.get("ImageType")) self.names = [] + self.dcm_dir = None # Use properties here to conform with SeriesImporter interface # and guarantee at creation that expected attributes exist + @property + def dcm_dir(self) -> str: + return self._dcm_dir + + @dcm_dir.setter + def dcm_dir(self, value): + self._dcm_dir = value + @property def series(self) -> str: return self._series @@ -1068,6 +1082,9 @@ def names(self) -> list[str]: def names(self, value: list[str]): self._names = value + def is_usable(self): + return any([item.endswith(".dcm") for item in self.contents]) + def get_files(self, output_dir: str, *args): with ZipFile(self.zip_file, "r") as fh: for item in self.contents: From ab49032f274274274aa3bb6b5dbe93f9fd5e2a25 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 12 Mar 2025 19:42:20 -0400 Subject: [PATCH 05/45] [FIX] Handle headers with an empty image_type field --- datman/importers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datman/importers.py b/datman/importers.py index 68256776..ffe44afc 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -1036,7 +1036,10 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items): self.series = str(header.get('SeriesNumber')) self.description = str(header.get('SeriesDescription')) self.uid = str(header.get('StudyInstanceUID')) - self.image_type = "////".join(header.get("ImageType")) + try: + self.image_type = "////".join(header.get("ImageType")) + except TypeError: + self.image_type = "" self.names = [] self.dcm_dir = None From 390e8ff98e02a80968daf40f820257bf9c44a780 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 13 Mar 2025 16:05:07 -0400 Subject: [PATCH 06/45] [FIX] Allow an ident to be stored with the XNATExperiment importer --- bin/dm_xnat_extract.py | 12 ++++++++---- datman/importers.py | 4 +++- datman/xnat.py | 27 ++++++++++++++++++++++----- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index ee2a9880..4d77d10d 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -403,7 +403,7 @@ def collect_experiment(config, experiment_id, study, url=None, auth=None): if not experiment: return [] - return [(xnat, xnat_project, ident)] + return [(xnat, experiment)] def get_identifier(config, subid): @@ -436,8 +436,11 @@ def collect_all_experiments(config, auth=None, url=None): for exper_id in xnat.get_experiment_ids(project): ident = get_experiment_identifier(config, project, exper_id) - if ident: - experiments.append((xnat, project, ident)) + if not ident: + continue + experiment = get_xnat_experiment(xnat, project, ident) + if experiment: + experiments.append((xnat, experiment)) return experiments @@ -491,7 +494,8 @@ def get_xnat_experiment(xnat, project, ident): try: xnat_experiment = xnat.get_experiment( - project, ident.get_xnat_subject_id(), experiment_label) + project, ident.get_xnat_subject_id(), experiment_label, + ident=ident) except Exception as e: logger.error(f"Unable to retrieve experiment {experiment_label} from " f"XNAT server. {type(e).__name__}: {e}") diff --git a/datman/importers.py b/datman/importers.py index ffe44afc..27943dc3 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -222,13 +222,15 @@ def __repr__(self): class XNATExperiment(SessionImporter, XNATObject): - def __init__(self, project, subject_name, experiment_json): + def __init__(self, project, subject_name, experiment_json, + ident=None): self.raw_json = experiment_json self.project = project self.subject = subject_name self.uid = self._get_field("UID") self.id = self._get_field("ID") self.date = self._get_field("date") + self._ident = ident if self.is_shared(): self.name = [label for label in self.get_alt_labels() diff --git a/datman/xnat.py b/datman/xnat.py index e2bbbca9..3cfdc7e1 100644 --- a/datman/xnat.py +++ b/datman/xnat.py @@ -10,7 +10,7 @@ import requests -from datman.exceptions import UndefinedSetting, XnatException +from datman.exceptions import UndefinedSetting, XnatException, InputException from datman.importers import XNATSubject, XNATExperiment, XNATScan @@ -439,24 +439,41 @@ def get_experiment_ids(self, project, subject=""): return [item.get("label") for item in result["ResultSet"]["Result"]] - def get_experiment(self, project, subject_id, exper_id, create=False): + def get_experiment(self, project, subject_id=None, exper_id=None, + create=False, ident=None): """Get an experiment from the XNAT server. Args: project (:obj:`str`): The XNAT project to search within. - subject_id (:obj:`str`): The XNAT subject to search. - exper_id (:obj:`str`): The name of the experiment to retrieve. + subject_id (:obj:`str`, optional): The XNAT subject to search. + Either subject_id and exper_id must both be provided or + ident must be given. + exper_id (:obj:`str`, optional): The name of the experiment + to retrieve. Either subject_id and exper_id must both be + provided or ident must be given. create (bool, optional): Whether to create an experiment matching exper_id if a match is not found. Defaults to False. + ident (:obj:`datman.scanid.Identifier`, optional): a datman + identifier. Must be provided if subject_id and exper_id are + not given. Raises: XnatException: If the experiment doesn't exist and can't be made or the server/API can't be accessed. + InputException: If not given both subject_id and exper_id OR + ident as arguments. Returns: :obj:`datman.xnat.XNATExperiment`: An XNATExperiment instance matching the given experiment ID. """ + if not (subject_id and exper_id): + if not ident: + raise InputException( + "Must be given either 1) subject ID and " + "experiment ID or 2) A datman.scanid.Identifier") + subject_id = ident.get_xnat_subject_id() + exper_id = ident.get_xnat_experiment_id() logger.debug( f"Querying XNAT server {self.server} for experiment {exper_id} " f"belonging to {subject_id} in project {project}") @@ -486,7 +503,7 @@ def get_experiment(self, project, subject_id, exper_id, create=False): raise XnatException( f"Could not access metadata for experiment {exper_id}") - return XNATExperiment(project, subject_id, exper_json) + return XNATExperiment(project, subject_id, exper_json, ident=ident) def make_experiment(self, project, subject, experiment): """Make a new (empty) experiment on the XNAT server. From 4240cfed7b9d0f48918677e55db7882f35dc91c2 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 13 Mar 2025 18:26:43 -0400 Subject: [PATCH 07/45] [FIX] Renamed SessionImporter dcm_dir -> dcm_subdir SessionImporter and SeriesImporter both used dcm_dir but for different functions (and with SeriesImporter sometimes being None). Renamed SessionImporter variable to better reflect differences. --- datman/exporters.py | 2 +- datman/importers.py | 35 ++++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/datman/exporters.py b/datman/exporters.py index 339d8b3d..b0ed0cd5 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -184,7 +184,7 @@ class BidsExporter(SessionExporter): type = "bids" def __init__(self, config, session, experiment, bids_opts=None, **kwargs): - self.dcm_dir = experiment.dcm_dir + self.dcm_dir = experiment.dcm_subdir self.bids_sub = session._ident.get_bids_name() self.bids_ses = session._ident.timepoint self.repeat = session._ident.session diff --git a/datman/importers.py b/datman/importers.py index 27943dc3..4650025a 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -73,8 +73,10 @@ def scans(self) -> list['SeriesImporter']: @property @abstractmethod - def dcm_dir(self) -> str: + def dcm_subdir(self) -> str: """The subfolder that will hold the session's dicom dirs. + + This will be a relative path, and will always be defined. """ pass @@ -128,6 +130,9 @@ class SeriesImporter(ABC): @abstractmethod def dcm_dir(self) -> str: """Full path to the folder that holds a local copy of the dicom files. + + This should be None if the dicoms have not been retrieved from their + source location (e.g. with get_files). """ pass @@ -166,6 +171,10 @@ def names(self) -> list[str]: """ pass + @abstractmethod + def is_usable(self) -> bool: + pass + @abstractmethod def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo' ) -> list[str]: @@ -241,7 +250,7 @@ def __init__(self, project, subject_name, experiment_json, self.source_name = self.name # The subdirectory to find the dicoms in after download - self.dcm_dir = os.path.join(self.name, "scans") + self.dcm_subdir = os.path.join(self.name, "scans") # Scan attributes self.scans = self._get_scans() @@ -290,12 +299,12 @@ def date(self, value: str): self._date = value @property - def dcm_dir(self) -> str: - return self._dcm_dir + def dcm_subdir(self) -> str: + return self._dcm_subdir - @dcm_dir.setter - def dcm_dir(self, value: str): - self._dcm_dir = value + @dcm_subdir.setter + def dcm_subdir(self, value: str): + self._dcm_subdir = value def _get_contents(self, data_type): children = self.raw_json.get("children", []) @@ -889,7 +898,7 @@ def __init__(self, ident, zip_path): self.resources = self.contents['resources'] # For compatibility (fix later) self.resource_files = self.resources - self.dcm_dir = os.path.split(self.scans[0].series_dir)[0] + self.dcm_subdir = os.path.split(self.scans[0].series_dir)[0] try: # Convert date to same format XNAT gives self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d").date()) @@ -934,12 +943,12 @@ def date(self, value: str): self._date = value @property - def dcm_dir(self) -> str: - return self._dcm_dir + def dcm_subdir(self) -> str: + return self._dcm_subdir - @dcm_dir.setter - def dcm_dir(self, value: str): - self._dcm_dir = value + @dcm_subdir.setter + def dcm_subdir(self, value: str): + self._dcm_subdir = value def is_shared(self) -> bool: # Can't track shared sessions with zip files. From a8d0f34ab8432c05c2f606fa666bb525d99b801b Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 13 Mar 2025 20:38:00 -0400 Subject: [PATCH 08/45] [FIX] Rename some vars, add missing items to interface classes --- bin/dm_xnat_extract.py | 2 +- bin/xnat_fetch_sessions.py | 2 +- datman/importers.py | 205 ++++++++++++++++++++++++++----------- 3 files changed, 148 insertions(+), 61 deletions(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index 4d77d10d..e0be9226 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -145,7 +145,7 @@ def main(): logger.info(f"Found {len(sessions)} sessions for study {args.study}") for xnat, importer in sessions: - session = datman.scan.Scan(importer._ident, config, + session = datman.scan.Scan(importer.ident, config, bids_root=args.bids_out) if importer.resource_files: diff --git a/bin/xnat_fetch_sessions.py b/bin/xnat_fetch_sessions.py index 05c5b437..6a4a8acb 100755 --- a/bin/xnat_fetch_sessions.py +++ b/bin/xnat_fetch_sessions.py @@ -186,7 +186,7 @@ def update_needed(zip_file, experiment, xnat): zip_scan_uids = get_scan_uids(zip_headers) zip_resources = get_resources(zip_file) - xnat_resources = experiment.get_resources(xnat) + xnat_resources = experiment.get_resource_uris(xnat) if not files_downloaded(zip_resources, xnat_resources) or \ not files_downloaded(zip_scan_uids, experiment.scan_UIDs): diff --git a/datman/importers.py b/datman/importers.py index 4650025a..33049ea9 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -38,6 +38,13 @@ class SessionImporter(ABC): # experiment.resource_IDs (dict of folder names to numerical IDs) # e.g. {'behav': '297528', 'misc': '305312'} + @property + @abstractmethod + def ident(self) -> 'datman.scanid.Identifier': + """A datman identifier for the session. + """ + pass + @property @abstractmethod def name(self) -> str: @@ -67,7 +74,14 @@ def date(self) -> str: @property @abstractmethod def scans(self) -> list['SeriesImporter']: - """A list scan series that belong to the session. + """A list of scan SeriesImporters that belong to the session. + """ + pass + + @property + @abstractmethod + def resource_files(self) -> list[str]: + """A list of relative paths for any resource (non-dcm) files. """ pass @@ -86,6 +100,12 @@ def is_shared(self) -> bool: """ pass + @abstractmethod + def get_files(self, dest_dir, *args, **kwargs): + """Retrieve all of the session's dcm files and place them in dest_dir. + """ + pass + def assign_scan_names(self, config, ident): """Assign a datman style name to each scan in this experiment. @@ -171,8 +191,31 @@ def names(self) -> list[str]: """ pass + @property + @abstractmethod + def image_type(self) -> str: + """The ImageType from the dicom headers. + """ + pass + + @property + @abstractmethod + def uid(self) -> str: + """The UID from the dicom headers. + """ + pass + + @abstractmethod def is_usable(self) -> bool: + """Indicates whether the series contains usable dcm files. + """ + pass + + @abstractmethod + def get_files(self, dest_dir, *args, **kwargs): + """Retrieve dcm files for this series and store them in dest_dir. + """ pass @abstractmethod @@ -180,6 +223,10 @@ def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo' ) -> list[str]: pass + @abstractmethod + def set_tag(self, tag_map): + pass + def _mangle_descr(self) -> str: """Modify a series description to remove non-alphanumeric characters. """ @@ -187,6 +234,16 @@ def _mangle_descr(self) -> str: return "" return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description) + def is_derived(self): + if not self.image_type: + logger.warning( + f"Image type could not be found for series {self.series}. " + "Assuming it's not derived.") + return False + if "DERIVED" in self.image_type: + return True + return False + ############################################################################### #### XNAT classes, formerly in xnat.py @@ -239,7 +296,7 @@ def __init__(self, project, subject_name, experiment_json, self.uid = self._get_field("UID") self.id = self._get_field("ID") self.date = self._get_field("date") - self._ident = ident + self.ident = ident if self.is_shared(): self.name = [label for label in self.get_alt_labels() @@ -264,8 +321,14 @@ def __init__(self, project, subject_name, experiment_json, # Misc - basically just OPT CU1 needs this self.misc_resource_IDs = self._get_other_resource_IDs() - # Use properties here to conform with SessionImporter interface - # and guarantee at creation that expected attributes exist + @property + def ident(self) -> 'datman.scanid.Identifier': + return self._ident + + @ident.setter + def ident(self, value: 'datman.scanid.Identifier'): + self._ident = value + @property def name(self) -> str: return self._name @@ -486,7 +549,7 @@ def get_autorun_ids(self, xnat): return wf_ids - def get_resources(self, xnat_connection): + def get_resource_uris(self, xnat_connection): """ Returns a list of all resource URIs from this session. """ @@ -499,7 +562,7 @@ def get_resources(self, xnat_connection): resources.extend([item["URI"] for item in resource_list]) return resources - def get_files(self, dest_folder, xnat, zip_name=None): + def get_files(self, dest_folder, xnat, *args, zip_name=None, **kwargs): """ Download a zip file containing all data for this session. Returns the path to the new file if download is successful, raises an exception if @@ -621,6 +684,14 @@ def names(self) -> list[str]: def names(self, value: list[str]): self._names = value + @property + def uid(self) -> list[str]: + return self._uid + + @uid.setter + def uid(self, value: list[str]): + self._uid = value + def _set_description(self): series_descr = self._get_field("series_description") if series_descr: @@ -645,16 +716,6 @@ def raw_dicoms_exist(self): return True return False - def is_derived(self): - if not self.image_type: - logger.warning( - f"Image type could not be found for series {self.series}. " - "Assuming it's not derived.") - return False - if "DERIVED" in self.image_type: - return True - return False - def set_tag(self, tag_map): matches = {} for tag, pattern in tag_map.items(): @@ -745,7 +806,7 @@ def is_usable(self, strict=False): return True - def get_files(self, output_dir, xnat_conn): + def get_files(self, output_dir, xnat_conn, *args, **kwargs): """Download all dicoms for this series. This will download all files in the series, and if successful, @@ -889,25 +950,23 @@ def __repr__(self): class ZipImporter(SessionImporter): def __init__(self, ident, zip_path): - # Would be good to not need ident here... - self._ident = ident - self.path = zip_path + self.ident = ident self.name = zip_path + self.path = zip_path + self.date = self.scans[0].date self.contents = self.parse_contents() self.scans = self.get_scans() - self.resources = self.contents['resources'] - # For compatibility (fix later) - self.resource_files = self.resources + self.resource_files = self.contents['resources'] self.dcm_subdir = os.path.split(self.scans[0].series_dir)[0] - try: - # Convert date to same format XNAT gives - self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d").date()) - except ValueError: - logger.error("Unexpected date format in dicom header.") - self.date = self.scans[0].date - # Use properties here to conform with SessionImporter interface - # and guarantee at creation that expected attributes exist + @property + def ident(self) -> 'datman.scanid.Identifier': + return self._ident + + @ident.setter + def ident(self, value: 'datman.scanid.Identifier'): + self._ident = value + @property def name(self) -> str: return self._name @@ -918,14 +977,27 @@ def name(self, value: str): @property def source_name(self) -> str: - # When using zip files, can't really track shared IDs so always - # equal name. + # When using zip files, can't really track shared IDs so it always + # equals name. return self.name @source_name.setter def source_name(self, value: str): self.name = value + @property + def date(self) -> str: + return self._date + + @date.setter + def date(self, value: str): + try: + # Convert date from usual header format to expected date format + self._date = str(datetime.strptime(value, "%Y%m%d").date()) + except ValueError: + logger.error(f"Unexpected date format given - {value}") + self._date = value + @property def scans(self) -> list['SeriesImporter']: return self._scans @@ -935,12 +1007,12 @@ def scans(self, value: list['SeriesImporter']): self._scans = value @property - def date(self) -> str: - return self._date + def resource_files(self) -> list[str]: + return self._resources - @date.setter - def date(self, value: str): - self._date = value + @resource_files.setter + def resource_files(self, value): + self._resources = value @property def dcm_subdir(self) -> str: @@ -954,7 +1026,7 @@ def is_shared(self) -> bool: # Can't track shared sessions with zip files. return False - def get_files(self, dest_path: str, *args) -> str: + def get_files(self, dest_path: str, *args, **kwargs): """Unpack the zip file at the given location. Args: @@ -965,14 +1037,21 @@ def get_files(self, dest_path: str, *args) -> str: self.extract_resources(dest_path) def get_resources(self, dest_path: str, fname: str = None): + """Unpack resource (non-dicom) files at the given location. + + Args: + dest_path (str): The full path to the location to extract into. + """ with ZipFile(self.path, "r") as fh: if fname: fh.extract(fname, path=dest_path) return - for item in self.resources: + for item in self.resources_files: fh.extract(item, path=dest_path) - def parse_contents(self): + def parse_contents(self) -> dict: + """Read and organize the contents of the zip file. + """ contents = { 'scans': {}, 'resources': [] @@ -991,25 +1070,16 @@ def parse_contents(self): item.filename) return contents - def get_scans(self): - # Headers = dict[rel_path -> pydicom.dataset.FileDataset] + def get_scans(self) -> list['ZipSeriesImporter']: + """Get ZipSeriesImporters for each scan in the session. + """ headers = get_archive_headers(self.path) - # scans = [] - # for sub_path in headers: - # # .get_full_subjectid may need to be changed for compatibility - # scans.append( - # ZipSeriesImporter( - # self.ident.get_full_subjectid(), self.path, sub_path, - # headers[sub_path], self.contents['scans'][sub_path] - # ) - # ) - # return scans scans = {} duplicate_series = set() for sub_path in headers: # .get_full_subjectid may need to be changed for compatibility zip_scan = ZipSeriesImporter( - self._ident.get_full_subjectid(), self.path, sub_path, + self.ident.get_full_subjectid(), self.path, sub_path, headers[sub_path], self.contents['scans'][sub_path] ) if zip_scan.series in scans: @@ -1047,10 +1117,7 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items): self.series = str(header.get('SeriesNumber')) self.description = str(header.get('SeriesDescription')) self.uid = str(header.get('StudyInstanceUID')) - try: - self.image_type = "////".join(header.get("ImageType")) - except TypeError: - self.image_type = "" + self.image_type = header.get("ImageType") self.names = [] self.dcm_dir = None @@ -1096,10 +1163,30 @@ def names(self) -> list[str]: def names(self, value: list[str]): self._names = value + @property + def image_type(self) -> str: + return self._image_type + + @image_type.setter + def image_type(self, value): + try: + # Ensure matches the expected XNAT format + self._image_type = "////".join(value) + except TypeError: + self._image_type = "" + + @property + def uid(self) -> list[str]: + return self._uid + + @uid.setter + def uid(self, value: list[str]): + self._uid = value + def is_usable(self): return any([item.endswith(".dcm") for item in self.contents]) - def get_files(self, output_dir: str, *args): + def get_files(self, output_dir: str, *args, **kwargs): with ZipFile(self.zip_file, "r") as fh: for item in self.contents: fh.extract(item, path=output_dir) From ac8cb2054c3c503125f641709871351b0a6ad6eb Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 13 Mar 2025 21:29:28 -0400 Subject: [PATCH 09/45] [CONF] Update pylint settings to ignore more messages --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 65a395a4..7ed546dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,12 @@ testpaths = ["tests"] fail-under = 7 ignore-paths = ['docs'] +[tool.pylint.logging] +logging-format-style = "new" + [tool.pylint."messages control"] disable = [ - "logging-format-interpolation" + "logging-format-interpolation", + "too-many-lines", + "logging-fstring-interpolation" ] From e7e175da87cae0cc63bf2bdc616c16d1b71727ed Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 14 Mar 2025 21:16:24 -0400 Subject: [PATCH 10/45] [PEP8] Fix formatting issues --- bin/dm_xnat_extract.py | 138 ++++++++++++-- bin/dm_xnat_upload.py | 7 +- bin/xnat_fetch_sessions.py | 6 +- datman/importers.py | 308 ++++++++++++++++-------------- datman/xnat.py | 375 ++++++++++++++++++------------------- pyproject.toml | 9 +- 6 files changed, 498 insertions(+), 345 deletions(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index e0be9226..c672d311 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -50,7 +50,6 @@ import platform import shutil import sys -from zipfile import BadZipFile import datman.config import datman.exceptions @@ -160,6 +159,8 @@ def main(): def read_args(): + """Configure the ArgumentParser. + """ def _is_dir(path, parser): """Ensure a given directory exists.""" if path is None or not os.path.isdir(path): @@ -316,6 +317,12 @@ def get_log_level(args): def configure_logging(study, log_level): + """Configure the logging for this run. + + Args: + study (:obj:`str`): The name of the study being exported. + log_level (:obj:`str`): The log level to use. + """ ch = logging.StreamHandler(sys.stdout) log_level = getattr(logging, log_level) @@ -332,9 +339,23 @@ def configure_logging(study, log_level): logging.getLogger('datman.dashboard').addHandler(ch) logging.getLogger('datman.xnat').addHandler(ch) logging.getLogger('datman.exporters').addHandler(ch) + logging.getLogger('datman.importers').addHandler(ch) def get_sessions(config, args): + """Get all scan sessions to be exported. + + Args: + config (:obj:`datman.config.config`): The datman configuration. + args (:obj:`argparse.ArgumentParser`): The argument parser for the + user's input arguments. + + Returns: + list[(None|datman.xnat.XNAT, datman.importers.SessionImporter)]: + a list of tuples containing the XNAT connection to use (if needed + during export) and a SessionImporter. If no sessions are found, + will return an empty list. + """ if args.use_zips != "USE_XNAT": return collect_zips(config, args) @@ -348,6 +369,19 @@ def get_sessions(config, args): def collect_zips(config, args): + """Locate all usable zip files. + + Args: + config (:obj:`datman.config.config`): The datman configuration. + args (:obj:argparse.ArgumentParser): The argument parser for the + user's command line inputs. + + Returns: + list[(None, datman.importers.ZipImporter)]: A list of tuples each + containing None (for compatibility with exporting XNATExperiments) + and a ZipImporter. Will return an empty list if no zip files are + found. + """ if args.use_zips is None: zip_folder = config.get_path("dicom") else: @@ -386,6 +420,24 @@ def collect_zips(config, args): def collect_experiment(config, experiment_id, study, url=None, auth=None): + """Get a single XNAT experiment. + + Args: + config (:obj:`datman.config.config`): A datman configuration object. + experiment_id (:obj:`str`): An XNAT experiment ID. + study (:obj:`str`): A valid study ID. + url (:obj:`str`, optional): The XNAT url to use. If not given, it + will be retrieved from the configuration files. + auth (:obj:`tuple`, optional): A tuple containing the username and + password to use when accessing the XNAT server. If not given, + the XNAT_USER and XNAT_PASS environment variables will be used. + + Return: + list[(datman.xnat.XNAT, datman.importers.XNATExperiment)]: + a list with a single tuple containing the xnat connection to use + and the experiment importer. If not found, an empty list will be + given. + """ ident = get_identifier(config, experiment_id) xnat = datman.xnat.get_connection( config, site=ident.site, url=url, auth=auth) @@ -407,6 +459,17 @@ def collect_experiment(config, experiment_id, study, url=None, auth=None): def get_identifier(config, subid): + """Get a valid identifier for a given ID. + + Args: + config (:obj:`datman.config.config`): A datman configuration object + for a study. + subid (:obj:`str`): A valid identifier in one of datman's accepted name + conventions. + + Returns: + datman.scanid.Identifier: A datman Identifier for the given subid. + """ ident = validate_subject_id(subid, config) try: @@ -425,6 +488,20 @@ def get_identifier(config, subid): def collect_all_experiments(config, auth=None, url=None): + """Retrieve all XNAT experiment objects for a single study. + + Args: + config (:obj:`datman.config.config`): A datman configuration object + for the current study. + auth (:obj:`tuple`, optional): A tuple containing an XNAT username and + password. If not provided, the XNAT_USER and XNAT_PASS variables + will be used. Defaults to None. + url (:obj:`str`): The URL for the XNAT server. + + Returns: + list[datman.importers.XNATExperiment]: A list of XNATExperiment + importers for all experiments belonging to the config's study. + """ experiments = [] server_cache = {} @@ -446,21 +523,31 @@ def collect_all_experiments(config, auth=None, url=None): def get_experiment_identifier(config, project, experiment_id): + """Get a valid datman identifier for an experiment found on XNAT. + + Args: + config (:obj:`datman.config.config`): A datman configuration object. + project (:obj:`str`): The name of a project on XNAT. + experiment_id (:obj:`str`): The name of an experiment found on XNAT. + + Returns: + :obj:`datman.scanid.Identifier` or None if experiment_id is invalid. + """ try: ident = validate_subject_id(experiment_id, config) except datman.scanid.ParseException: logger.error(f"Invalid XNAT experiment ID {experiment_id} in project " f"{project}. Please update XNAT with correct ID.") - return + return None if ident.session is None and not datman.scanid.is_phantom(ident): logger.error(f"Invalid experiment ID {experiment_id} in project " f"{project}. Reason - Not a phantom, but missing session " "number") - return + return None if ident.modality != "MR": - return + return None return ident @@ -488,6 +575,18 @@ def get_projects(config): def get_xnat_experiment(xnat, project, ident): + """Retrieve information about an XNAT experiment. + + Args: + xnat (:obj:`datman.xnat.XNAT`): A connection to an XNAT server. + project (:obj:`str`): The name of the XNAT project the experiment + belongs to. + ident (:obj:`datman.scanid.Identifier`): A datman identifier for the + experiment. + + Returns: + :obj:`datman.importers.XNATExperiment` or None if not found. + """ experiment_label = ident.get_xnat_experiment_id() logger.info(f"Retrieving experiment: {experiment_label}") @@ -499,11 +598,22 @@ def get_xnat_experiment(xnat, project, ident): except Exception as e: logger.error(f"Unable to retrieve experiment {experiment_label} from " f"XNAT server. {type(e).__name__}: {e}") - return + return None return xnat_experiment def export_resources(resource_dir, xnat, importer, dry_run=False): + """Export all resource (non-dicom) files for a scan session. + + Args: + resource_dir (:obj:`str`): The absolute path to where resources + should be exported. + xnat (:obj:`datman.xnat.XNAT`): A connection to an XNAT server. + importer (:obj:`datman.importers.SessionImporter`): An importer for + the scan session to export resources for. + dry_run (bool, optional): Report changes that would be made without + modifying anything. Defaults to False. + """ logger.info(f"Extracting {len(importer.resource_files)} resources " f"from {importer.name}") @@ -523,7 +633,7 @@ def export_resources(resource_dir, xnat, importer, dry_run=False): xnat_experiment = importer - for label in xnat_experiment.resource_IDs: + for label in xnat_experiment.resource_ids: if label == "No Label": target_path = os.path.join(resource_dir, "MISC") else: @@ -535,7 +645,7 @@ def export_resources(resource_dir, xnat, importer, dry_run=False): logger.error(f"Failed creating target folder: {target_path}") continue - xnat_resource_id = xnat_experiment.resource_IDs[label] + xnat_resource_id = xnat_experiment.resource_ids[label] try: resources = xnat.get_resource_list(xnat_experiment.project, @@ -575,7 +685,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id, if dry_run: logger.info(f"DRY RUN: Skipping download of {xnat_resource_uri} to " f"{target_path}") - return + return None try: source = xnat.get_resource(xnat_experiment.project, @@ -587,7 +697,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id, except Exception as e: logger.error("Failed downloading resource archive from " f"{xnat_experiment.name} with reason: {e}") - return + return None # check that the target path exists target_dir = os.path.split(target_path)[0] @@ -596,7 +706,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id, os.makedirs(target_dir) except OSError: logger.error(f"Failed to create directory: {target_dir}") - return + return None # copy the downloaded file to the target location try: @@ -700,6 +810,7 @@ def make_session_exporters(config, session, experiment, bids_opts=None, exporters = [] for exp_format in formats: + # pylint: disable-next=invalid-name Exporter = datman.exporters.get_exporter(exp_format, scope="session") exporters.append( Exporter(config, session, experiment, bids_opts=bids_opts, @@ -771,6 +882,8 @@ def make_all_series_exporters(config, session, experiment, bids_opts=None, def get_tag_settings(config, site): + """Get configuration for all tags defined for a specific site. + """ try: tags = config.get_tags(site=site) except datman.exceptions.UndefinedSetting: @@ -819,6 +932,7 @@ def make_series_exporters(session, scan, tag_config, config, wanted_tags=None, logger.debug(f"Found export formats {formats} for {scan}") for exp_format in formats: + # pylint: disable-next=invalid-name Exporter = datman.exporters.get_exporter( exp_format, scope="series") @@ -855,14 +969,14 @@ def is_blacklisted(scan_name, config): def needs_raw(session_exporters): """Returns true if raw data is needed to run any session exporters. """ - return any([exp.needs_raw_data() for exp in session_exporters]) + return any(exp.needs_raw_data() for exp in session_exporters) def needs_export(session_exporters): """Returns True if any session exporters need to be run. """ try: - return any([not exp.outputs_exist() for exp in session_exporters]) + return any(not exp.outputs_exist() for exp in session_exporters) except ValueError: # ValueError is raised when an invalid series number exists on XNAT. # Skip these sessions diff --git a/bin/dm_xnat_upload.py b/bin/dm_xnat_upload.py index c9eef59b..ef3c6686 100755 --- a/bin/dm_xnat_upload.py +++ b/bin/dm_xnat_upload.py @@ -277,8 +277,9 @@ def resource_data_exists(xnat_resources, archive): if zf.read(item)] empty_files = list(set(local_resources) - set(local_resources_mod)) if empty_files: - logger.warning("Cannot upload empty resource files {}, omitting." - "".format(", ".join(empty_files))) + logger.warning( + f"Cannot upload empty resource files {', '.join(empty_files)}, " + "omitting.") # paths in xnat are url encoded. Need to fix local paths to match local_resources_mod = [urllib.request.pathname2url(p) for p in local_resources_mod] @@ -299,7 +300,7 @@ def scan_data_exists(xnat_experiment, local_headers): if xnat_experiment.uid not in local_experiment_ids: raise ValueError("Experiment UID doesnt match XNAT") - if not set(local_scan_uids).issubset(set(xnat_experiment.scan_UIDs)): + if not set(local_scan_uids).issubset(set(xnat_experiment.scan_uids)): logger.info("Found UIDs for {} not yet added to xnat".format( xnat_experiment.name)) return False diff --git a/bin/xnat_fetch_sessions.py b/bin/xnat_fetch_sessions.py index 6a4a8acb..cc7a8263 100755 --- a/bin/xnat_fetch_sessions.py +++ b/bin/xnat_fetch_sessions.py @@ -85,7 +85,7 @@ def main(): logger.setLevel(logging.ERROR) if not study: - with datman.xnat.xnat(xnat_server, username, password) as xnat: + with datman.xnat.XNAT(xnat_server, username, password) as xnat: download_subjects(xnat, xnat_project, destination) return @@ -104,7 +104,7 @@ def main(): logger.error("{}".format(e)) continue username, password = get_credentials(credentials_file) - with datman.xnat.xnat(server, username, password) as xnat: + with datman.xnat.XNAT(server, username, password) as xnat: download_subjects(xnat, project, destination) @@ -189,7 +189,7 @@ def update_needed(zip_file, experiment, xnat): xnat_resources = experiment.get_resource_uris(xnat) if not files_downloaded(zip_resources, xnat_resources) or \ - not files_downloaded(zip_scan_uids, experiment.scan_UIDs): + not files_downloaded(zip_scan_uids, experiment.scan_uids): logger.error("Some of XNAT contents for {} is missing from file " "system. Zip file will be deleted and recreated" "".format(experiment.name)) diff --git a/datman/importers.py b/datman/importers.py index 33049ea9..c6d0bb0f 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -4,6 +4,7 @@ uses these classes to create a uniform interface for its exporters, which create the files and database contents users may actually interact with. """ + from abc import ABC, abstractmethod from datetime import datetime import glob @@ -12,9 +13,9 @@ import os import re import shutil -from zipfile import ZipFile +from zipfile import ZipFile, BadZipFile -from datman.exceptions import ParseException, XnatException, InputException +from datman.exceptions import ParseException, XnatException from datman.utils import is_dicom, get_archive_headers @@ -22,35 +23,20 @@ class SessionImporter(ABC): - - # Exporters currently use these from XNATExperiment: - # experiment.name - # experiment.source_name (related to sharing data) - # experiment.scans - # experiment.date - # experiment.is_shared() - - # Missed but possibly needed attributes (from extract): - # experiment.assign_scan_names(config, ident) - # - # Maybe we really just need a resource exporter class... - # experiment.resource_files (list of dicts) - # experiment.resource_IDs (dict of folder names to numerical IDs) - # e.g. {'behav': '297528', 'misc': '305312'} + """An interface for importing a whole scan session into datman. + """ @property @abstractmethod def ident(self) -> 'datman.scanid.Identifier': """A datman identifier for the session. """ - pass @property @abstractmethod def name(self) -> str: """A valid ID for the scan session being imported. """ - pass @property @abstractmethod @@ -62,28 +48,24 @@ def source_name(self) -> str: corresponds to it's original ID. This will be equal to 'name' when the session is not shared or sharing is not being tracked. """ - pass @property @abstractmethod def date(self) -> str: """A string representation (YYYY-MM-DD) of the scan collection date. """ - pass @property @abstractmethod def scans(self) -> list['SeriesImporter']: """A list of scan SeriesImporters that belong to the session. """ - pass @property @abstractmethod def resource_files(self) -> list[str]: """A list of relative paths for any resource (non-dcm) files. """ - pass @property @abstractmethod @@ -92,19 +74,16 @@ def dcm_subdir(self) -> str: This will be a relative path, and will always be defined. """ - pass @abstractmethod def is_shared(self) -> bool: """Indicates whether the session is shared with other projects. """ - pass @abstractmethod - def get_files(self, dest_dir, *args, **kwargs): + def get_files(self, dest_dir: str, *args, **kwargs): """Retrieve all of the session's dcm files and place them in dest_dir. """ - pass def assign_scan_names(self, config, ident): """Assign a datman style name to each scan in this experiment. @@ -128,7 +107,7 @@ def assign_scan_names(self, config, ident): for scan in self.scans: try: scan.set_datman_name(str(ident), tags) - except Exception as e: + except (ParseException, TypeError, KeyError) as e: logger.info( f"Failed to make file name for series {scan.series} " f"in session {str(ident)}. Reason {type(e).__name__}: " @@ -136,16 +115,9 @@ def assign_scan_names(self, config, ident): class SeriesImporter(ABC): - # XNATScan attributes and methods used by exporters... - # .series - # .subject (FakeSideCar needs) - # .names - # .description - - # MISSED (may have missed more in dm_xnat_extract): - # scan.download_dir - # xnat copy for example points to: /scratch/dawn/temp_stuff/export_zip/xnat_copy/SPN10_CMH_0083_01_SE01_MR/scans/6-t1_mprage_T1_900/resources/DICOM/files - # unzipped copy would be (diff session): 20190116_Ex09352_ASND1MR_ASQB002/Ex09352_Se00003_SagT1Bravo-1mm-32ch/ + """An interface for importing a single dcm series into datman. + """ + @property @abstractmethod def dcm_dir(self) -> str: @@ -154,7 +126,6 @@ def dcm_dir(self) -> str: This should be None if the dicoms have not been retrieved from their source location (e.g. with get_files). """ - pass @property @abstractmethod @@ -164,7 +135,6 @@ def series(self) -> str: This should be a string because sometimes the 'number' comes with non-numeric prefixes or postfixes (e.g. on XNAT in some circumstances). """ - pass @property @abstractmethod @@ -175,57 +145,51 @@ def subject(self) -> str: truncated or extended version of it as subject may be to experiment on XNAT). """ - pass @property @abstractmethod def description(self) -> str: """The series description (as from the dicom headers). """ - pass @property @abstractmethod def names(self) -> list[str]: """A list of valid scan names that may be applied to this series. """ - pass @property @abstractmethod def image_type(self) -> str: """The ImageType from the dicom headers. """ - pass @property @abstractmethod def uid(self) -> str: """The UID from the dicom headers. """ - pass - @abstractmethod def is_usable(self) -> bool: """Indicates whether the series contains usable dcm files. """ - pass @abstractmethod def get_files(self, dest_dir, *args, **kwargs): """Retrieve dcm files for this series and store them in dest_dir. """ - pass @abstractmethod - def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo' - ) -> list[str]: - pass + def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo' + ) -> list[str]: + """Construct a datman-style name for the scan. + """ @abstractmethod def set_tag(self, tag_map): - pass + """Set the scan tag for the scan. + """ def _mangle_descr(self) -> str: """Modify a series description to remove non-alphanumeric characters. @@ -234,7 +198,9 @@ def _mangle_descr(self) -> str: return "" return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description) - def is_derived(self): + def is_derived(self) -> bool: + """Check if the scan is derived or primary. + """ if not self.image_type: logger.warning( f"Image type could not be found for series {self.series}. " @@ -245,24 +211,42 @@ def is_derived(self): return False -############################################################################### -#### XNAT classes, formerly in xnat.py +class XNATObject(ABC): + """A meta class for classes that manage XNAT contents. + """ + @property + @abstractmethod + def raw_json(self) -> dict: + """The json for the XNAT entity. + """ -class XNATObject(ABC): - def _get_field(self, key): + def get_field(self, key): + """Get an item from an XNAT object's data fields. + """ if not self.raw_json.get("data_fields"): return "" return self.raw_json["data_fields"].get(key, "") class XNATSubject(XNATObject): + """An XNAT subject, which may hold one or more experiments. + """ + def __init__(self, subject_json): self.raw_json = subject_json - self.name = self._get_field("label") - self.project = self._get_field("project") + self.name = self.get_field("label") + self.project = self.get_field("project") self.experiments = self._get_experiments() + @property + def raw_json(self) -> dict: + return self._json + + @raw_json.setter + def raw_json(self, value): + self._json = value + def _get_experiments(self): experiments = [ exp for exp in self.raw_json["children"] @@ -288,22 +272,25 @@ def __repr__(self): class XNATExperiment(SessionImporter, XNATObject): + """An XNAT experiment which may hold scan data and resource files. + """ + def __init__(self, project, subject_name, experiment_json, ident=None): self.raw_json = experiment_json self.project = project self.subject = subject_name - self.uid = self._get_field("UID") - self.id = self._get_field("ID") - self.date = self._get_field("date") + self.uid = self.get_field("UID") + self.id = self.get_field("ID") + self.date = self.get_field("date") self.ident = ident if self.is_shared(): self.name = [label for label in self.get_alt_labels() if self.subject in label][0] - self.source_name = self._get_field("label") + self.source_name = self.get_field("label") else: - self.name = self._get_field("label") + self.name = self.get_field("label") self.source_name = self.name # The subdirectory to find the dicoms in after download @@ -311,15 +298,23 @@ def __init__(self, project, subject_name, experiment_json, # Scan attributes self.scans = self._get_scans() - self.scan_UIDs = self._get_scan_UIDs() - self.scan_resource_IDs = self._get_scan_rIDs() + self.scan_uids = self._get_scan_uids() + self.scan_resource_ids = self._get_scan_rids() # Resource attributes self.resource_files = self._get_contents("resources/resource") - self.resource_IDs = self._get_resource_IDs() + self.resource_ids = self._get_resource_ids() # Misc - basically just OPT CU1 needs this - self.misc_resource_IDs = self._get_other_resource_IDs() + self.misc_resource_ids = self._get_other_resource_ids() + + @property + def raw_json(self) -> dict: + return self._json + + @raw_json.setter + def raw_json(self, value): + self._json = value @property def ident(self) -> 'datman.scanid.Identifier': @@ -361,6 +356,14 @@ def date(self) -> str: def date(self, value: str): self._date = value + @property + def resource_files(self) -> list[str]: + return self._resource_files + + @resource_files.setter + def resource_files(self, value): + self._resource_files = value + @property def dcm_subdir(self) -> str: return self._dcm_subdir @@ -387,10 +390,10 @@ def _get_scans(self): xnat_scans.append(XNATScan(self, scan_json)) return xnat_scans - def _get_scan_UIDs(self): + def _get_scan_uids(self): return [scan.uid for scan in self.scans] - def _get_scan_rIDs(self): + def _get_scan_rids(self): # These can be used to download a series from xnat resource_ids = [] for scan in self.scans: @@ -408,7 +411,7 @@ def _get_scan_rIDs(self): resource_ids.append(str(r_id)) return resource_ids - def _get_resource_IDs(self): + def _get_resource_ids(self): if not self.resource_files: return {} @@ -419,7 +422,7 @@ def _get_resource_IDs(self): resource["data_fields"]["xnat_abstractresource_id"]) return resource_ids - def _get_other_resource_IDs(self): + def _get_other_resource_ids(self): """ OPT's CU site uploads niftis to their server. These niftis are neither classified as resources nor as scans so our code misses them entirely. @@ -485,7 +488,7 @@ def get_autorun_ids(self, xnat): XnatException: If no AutoRun.xml pipeline instance is found or the API response can't be parsed. """ - query_xml = """ + query_xml = f""" wrk:workflowData/ID LIKE - {exp_id} + {self.id} wrk:workflowData/ExternalID = - {project} + {self.project} wrk:workflowData/pipeline_name @@ -527,18 +530,18 @@ def get_autorun_ids(self, xnat): - """.format(exp_id=self.id, project=self.project) # noqa: E501 + """ # noqa: E501 query_url = f"{xnat.server}/data/search?format=json" - response = xnat._make_xnat_post(query_url, data=query_xml) + response = xnat.make_xnat_post(query_url, data=query_xml) if not response: raise XnatException("AutoRun.xml pipeline not found.") try: found_pipelines = json.loads(response) - except json.JSONDecodeError: - raise XnatException("Can't decode workflow query response.") + except json.JSONDecodeError as e: + raise XnatException("Can't decode workflow query response.") from e try: results = found_pipelines["ResultSet"]["Result"] @@ -554,31 +557,32 @@ def get_resource_uris(self, xnat_connection): Returns a list of all resource URIs from this session. """ resources = [] - resource_ids = list(self.resource_IDs.values()) - resource_ids.extend(self.misc_resource_IDs) + resource_ids = list(self.resource_ids.values()) + resource_ids.extend(self.misc_resource_ids) for r_id in resource_ids: resource_list = xnat_connection.get_resource_list( self.project, self.subject, self.name, r_id) resources.extend([item["URI"] for item in resource_list]) return resources - def get_files(self, dest_folder, xnat, *args, zip_name=None, **kwargs): + # pylint: disable-next=arguments-differ + def get_files(self, dest_dir, xnat, *args, zip_name=None, **kwargs): """ Download a zip file containing all data for this session. Returns the path to the new file if download is successful, raises an exception if not Args: - dest_folder: The absolute path to the folder where the zip + dest_dir: The absolute path to the folder where the zip should be deposited - xnat: An instance of datman.xnat.xnat() + xnat: An instance of datman.xnat.XNAT() zip_name: An optional name for the output zip file. If not set the zip name will be session.name """ - resources_list = list(self.scan_resource_IDs) - resources_list.extend(self.misc_resource_IDs) - resources_list.extend(self.resource_IDs) + resources_list = list(self.scan_resource_ids) + resources_list.extend(self.misc_resource_ids) + resources_list.extend(self.resource_ids) if not resources_list: raise ValueError(f"No scans or resources found for {self.name}") @@ -590,13 +594,13 @@ def get_files(self, dest_folder, xnat, *args, zip_name=None, **kwargs): if not zip_name: zip_name = self.name.upper() + ".zip" - output_path = os.path.join(dest_folder, zip_name) + output_path = os.path.join(dest_dir, zip_name) if os.path.exists(output_path): logger.error( f"Cannot download {output_path}, file already exists.") return output_path - xnat._get_xnat_stream(url, output_path) + xnat.get_xnat_stream(url, output_path) return output_path @@ -607,7 +611,7 @@ def is_shared(self) -> bool: if not alt_names: return False - return any([self.subject in label for label in alt_names]) + return any(self.subject in label for label in alt_names) def get_alt_labels(self): """Find the names for all shared copies of the XNAT experiment. @@ -625,27 +629,38 @@ def __repr__(self): class XNATScan(SeriesImporter, XNATObject): + """A single XNAT series. + """ + def __init__(self, experiment, scan_json): + self.raw_json = scan_json self.project = experiment.project self.subject = experiment.subject self.experiment = experiment.name self.shared = experiment.is_shared() self.source_experiment = experiment.source_name self.raw_json = scan_json - self.uid = self._get_field("UID") - self.series = self._get_field("ID") - self.image_type = self._get_field("parameters/imageType") + self.uid = self.get_field("UID") + self.series = self.get_field("ID") + self.image_type = self.get_field("parameters/imageType") self.multiecho = self.is_multiecho() self.description = self._set_description() - self.type = self._get_field("type") + self.type = self.get_field("type") self.names = [] + self.echo_dict = {} # Will remain empty unless scan is multi-echo self.tags = [] self.dcm_dir = None - # Use properties here to conform with SeriesImporter interface - # and guarantee at creation that expected attributes exist @property - def dcm_dir(self) ->str: + def raw_json(self) -> dict: + return self._json + + @raw_json.setter + def raw_json(self, value): + self._json = value + + @property + def dcm_dir(self) -> str: return self._dcm_dir @dcm_dir.setter @@ -684,6 +699,14 @@ def names(self) -> list[str]: def names(self, value: list[str]): self._names = value + @property + def image_type(self) -> str: + return self._image_type + + @image_type.setter + def image_type(self, value): + self._image_type = value + @property def uid(self) -> list[str]: return self._uid @@ -693,12 +716,14 @@ def uid(self, value: list[str]): self._uid = value def _set_description(self): - series_descr = self._get_field("series_description") + series_descr = self.get_field("series_description") if series_descr: return series_descr - return self._get_field("type") + return self.get_field("type") def is_multiecho(self): + """Check if the series is multiecho. + """ try: child = self.raw_json["children"][0]["items"][0] except (KeyError, IndexError): @@ -709,6 +734,8 @@ def is_multiecho(self): return False def raw_dicoms_exist(self): + """Check if any dicom files exist for the scan. + """ for child in self.raw_json["children"]: for item in child["items"]: file_type = item["data_fields"].get("content") @@ -747,7 +774,8 @@ def _set_fmap_tag(self, tag_map, matches): if tag in matches: if not re.search(pattern["ImageType"], self.image_type): del matches[tag] - except Exception: + except (re.error, TypeError) as e: + logger.error(f"Error applying FMAP tags: {e}. Ignoring tag.") matches = {} if len(matches) > 2 or (len(matches) == 2 and not self.multiecho): @@ -759,15 +787,17 @@ def set_datman_name(self, base_name, tags): mangled_descr = self._mangle_descr() padded_series = self.series.zfill(2) tag_settings = self.set_tag(tags.series_map) + if not tag_settings: raise ParseException( f"Can't identify tag for series {self.series}") + names = [] self.echo_dict = {} - for tag in tag_settings: + for tag, settings in tag_settings.items(): name = "_".join([base_name, tag, padded_series, mangled_descr]) if self.multiecho: - echo_num = tag_settings[tag]["EchoNumber"] + echo_num = settings["EchoNumber"] if echo_num not in self.echo_dict: self.echo_dict[echo_num] = name names.append(name) @@ -806,14 +836,15 @@ def is_usable(self, strict=False): return True - def get_files(self, output_dir, xnat_conn, *args, **kwargs): + # pylint: disable-next=arguments-differ + def get_files(self, dest_dir, xnat_conn, *args, **kwargs): """Download all dicoms for this series. This will download all files in the series, and if successful, set the dcm_dir attribute to the destination folder. Args: - output_dir (:obj:`str`): The full path to the location to + dest_dir (:obj:`str`): The full path to the location to download all files to. xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection to the server to download from. @@ -832,7 +863,7 @@ def get_files(self, output_dir, xnat_conn, *args, **kwargs): try: dicom_zip = xnat_conn.get_dicom(self.project, self.subject, self.experiment, self.series) - except Exception as e: + except XnatException as e: logger.error(f"Failed to download dicom archive for {self.subject}" f" series {self.series}. Reason - {e}") return False @@ -849,21 +880,21 @@ def get_files(self, output_dir, xnat_conn, *args, **kwargs): try: with ZipFile(dicom_zip, "r") as fh: - fh.extractall(output_dir) - except Exception as e: + fh.extractall(dest_dir) + except (BadZipFile, PermissionError) as e: logger.error("An error occurred unpacking dicom archive for " f"{self.experiment}'s series {self.series}' - {e}") os.remove(dicom_zip) return False - else: - logger.info("Unpacking complete. Deleting archive file " - f"{dicom_zip}") - os.remove(dicom_zip) + + logger.info("Unpacking complete. Deleting archive file " + f"{dicom_zip}") + os.remove(dicom_zip) if self.shared: - self._fix_download_name(output_dir) + self._fix_download_name(dest_dir) - dicom_file = self._find_first_dicom(output_dir) + dicom_file = self._find_first_dicom(dest_dir) try: self.dcm_dir = os.path.dirname(dicom_file) @@ -884,11 +915,12 @@ def _find_first_dicom(self, dcm_dir): exist in the folder. """ search_dir = self._find_series_dir(dcm_dir) - for root_dir, folder, files in os.walk(search_dir): + for root_dir, _, files in os.walk(search_dir): for item in files: path = os.path.join(root_dir, item) if is_dicom(path): return path + return None def _find_series_dir(self, search_dir): """Find the directory a series was downloaded to, if any. @@ -943,11 +975,9 @@ def __repr__(self): return self.__str__() -############################################################################# -# Zip file classes - - class ZipImporter(SessionImporter): + """A zip file to be managed by datman. + """ def __init__(self, ident, zip_path): self.ident = ident @@ -1026,28 +1056,28 @@ def is_shared(self) -> bool: # Can't track shared sessions with zip files. return False - def get_files(self, dest_path: str, *args, **kwargs): + def get_files(self, dest_dir: str, *args, **kwargs): """Unpack the zip file at the given location. Args: - dest_path (str): The full path to the location to extract into. + dest_dir (str): The full path to the location to extract into. """ for item in self.scans: - item.get_files(dest_path) - self.extract_resources(dest_path) + item.get_files(dest_dir) + self.get_resources(dest_dir) - def get_resources(self, dest_path: str, fname: str = None): + def get_resources(self, dest_dir: str, fname: str = None): """Unpack resource (non-dicom) files at the given location. Args: - dest_path (str): The full path to the location to extract into. + dest_dir (str): The full path to the location to extract into. """ with ZipFile(self.path, "r") as fh: if fname: - fh.extract(fname, path=dest_path) + fh.extract(fname, path=dest_dir) return - for item in self.resources_files: - fh.extract(item, path=dest_path) + for item in self.resource_files: + fh.extract(item, path=dest_dir) def parse_contents(self) -> dict: """Read and organize the contents of the zip file. @@ -1076,11 +1106,11 @@ def get_scans(self) -> list['ZipSeriesImporter']: headers = get_archive_headers(self.path) scans = {} duplicate_series = set() - for sub_path in headers: + for sub_path, header in headers.items(): # .get_full_subjectid may need to be changed for compatibility zip_scan = ZipSeriesImporter( self.ident.get_full_subjectid(), self.path, sub_path, - headers[sub_path], self.contents['scans'][sub_path] + header, self.contents['scans'][sub_path] ) if zip_scan.series in scans: duplicate_series.add(zip_scan.series) @@ -1106,7 +1136,10 @@ def __repr__(self): class ZipSeriesImporter(SeriesImporter): + """A single scan series from a zip file to be managed by datman. + """ + # pylint: disable-next=too-many-arguments,too-many-positional-arguments def __init__(self, subject, zip_file, series_dir, header, zip_items): self.subject = subject self.zip_file = zip_file @@ -1119,10 +1152,9 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items): self.uid = str(header.get('StudyInstanceUID')) self.image_type = header.get("ImageType") self.names = [] + self.tags = [] self.dcm_dir = None - # Use properties here to conform with SeriesImporter interface - # and guarantee at creation that expected attributes exist @property def dcm_dir(self) -> str: return self._dcm_dir @@ -1184,16 +1216,16 @@ def uid(self, value: list[str]): self._uid = value def is_usable(self): - return any([item.endswith(".dcm") for item in self.contents]) + return any(item.endswith(".dcm") for item in self.contents) - def get_files(self, output_dir: str, *args, **kwargs): + def get_files(self, dest_dir: str, *args, **kwargs): with ZipFile(self.zip_file, "r") as fh: for item in self.contents: - fh.extract(item, path=output_dir) - self.dcm_dir = os.path.join(output_dir, self.series_dir) + fh.extract(item, path=dest_dir) + self.dcm_dir = os.path.join(dest_dir, self.series_dir) def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo' - ) -> list[str]: + ) -> list[str]: mangled_descr = self._mangle_descr() tag_settings = self.set_tag(tags.series_map) if not tag_settings: @@ -1224,10 +1256,12 @@ def set_tag(self, tag_map): matches[tag] = pattern if (len(matches) == 1 or - all(['EchoNumber' in matches[tag] for tag in matches])): + all('EchoNumber' in conf for conf in matches.values())): self.tags = list(matches.keys()) return matches + return {} + def __str__(self): return f"" diff --git a/datman/xnat.py b/datman/xnat.py index 3cfdc7e1..debc93b8 100644 --- a/datman/xnat.py +++ b/datman/xnat.py @@ -11,13 +11,28 @@ import requests from datman.exceptions import UndefinedSetting, XnatException, InputException -from datman.importers import XNATSubject, XNATExperiment, XNATScan +from datman.importers import XNATSubject, XNATExperiment logger = logging.getLogger(__name__) -def get_server(config=None, url=None, port=None): +def get_server(config: 'datman.config.config' = None, + url: str = None, + port: str = None): + """Get correctly formatted XNAT server URL. + + Args: + config (:obj:`datman.config.config`, optional): A datman configuration + object. Must be provided if url argument is not given. + url (:obj:`str`, optional): A server url to use (and possibly + re-adjust). Must be provided if config argument is not given. + port (:obj:`str`, optional): A string representation of a port to use + instead of traditional http/https ports. + + Returns: + str: A server url of the expected format. + """ if not config and not url: raise XnatException("Can't construct a valid server URL without a " "datman.config.config instance or string url") @@ -74,34 +89,47 @@ def get_port_str(config=None, port=None): def get_auth(username=None, file_path=None): + """Retrieve username and password for XNAT. + + If no inputs are given then the environment variables XNAT_USER and + XNAT_PASS will be used. + + Args: + username (:obj:`str`, optional): A username to use. If given, the + user will be prompted for a password. + file_path (:obj:`str`, optional): A path to a credentials file. + + Returns: + tuple(str, str): A tuple containing a username and password. + """ if username: return (username, getpass.getpass()) if file_path: try: - with open(file_path, "r") as cred_file: + with open(file_path, "r", encoding="utf-8") as cred_file: contents = cred_file.readlines() except Exception as e: raise XnatException( f"Failed to read credentials file {file_path}. " - f"Reason - {e}") + f"Reason - {e}") from e try: username = contents[0].strip() password = contents[1].strip() - except IndexError: + except IndexError as e: raise XnatException( f"Failed to read credentials file {file_path} - " - "incorrectly formatted.") + "incorrectly formatted.") from e return (username, password) try: username = os.environ["XNAT_USER"] except KeyError: - raise KeyError("XNAT_USER not defined in environment") + raise KeyError("XNAT_USER not defined in environment") from None try: password = os.environ["XNAT_PASS"] except KeyError: - raise KeyError("XNAT_PASS not defined in environment") + raise KeyError("XNAT_PASS not defined in environment") from None return (username, password) @@ -143,7 +171,7 @@ def get_connection(config, site=None, url=None, auth=None, server_cache=None): server_url = get_server(url=url) if auth: - connection = xnat(server_url, auth[0], auth[1]) + connection = XNAT(server_url, auth[0], auth[1]) else: try: auth_file = config.get_key("XnatCredentials", site=site) @@ -155,7 +183,7 @@ def get_connection(config, site=None, url=None, auth=None, server_cache=None): # User probably provided metadata file name only auth_file = os.path.join(config.get_path("meta"), auth_file) username, password = get_auth(file_path=auth_file) - connection = xnat(server_url, username, password) + connection = XNAT(server_url, username, password) if server_cache is not None: server_cache[url] = connection @@ -163,7 +191,11 @@ def get_connection(config, site=None, url=None, auth=None, server_cache=None): return connection -class xnat(object): +# pylint: disable-next=too-many-public-methods +class XNAT: + """Manage a connection to an XNAT server. + """ + server = None auth = None headers = None @@ -178,12 +210,13 @@ def __init__(self, server, username, password): self.open_session() except Exception as e: raise XnatException( - f"Failed to open session with server {server}. Reason - {e}") + f"Failed to open session with server {server}. Reason - {e}" + ) from e def __enter__(self): return self - def __exit__(self, type, value, traceback): + def __exit__(self, *args): # Ends the session on the server side url = f"{self.server}/data/JSESSION" self.session.delete(url) @@ -197,10 +230,10 @@ def open_session(self): response = s.post(url, auth=self.auth) - if not response.status_code == requests.codes.ok: + if response.status_code != 200: logger.warning(f"Failed connecting to xnat server {self.server} " f"with response code {response.status_code}") - logger.debug("Username: {}") + logger.debug(f"Username: {self.auth[0]}") response.raise_for_status() # If password is expired, XNAT returns status 200 and a sea of @@ -242,9 +275,10 @@ def get_projects(self, project=""): try: result = self._make_xnat_query(url) - except Exception: + except Exception as e: raise XnatException( - f"Failed getting projects from server with search URL {url}") + f"Failed getting projects from server with search URL {url}" + ) from e if not result: logger.debug(f"No projects found on server {self.server}") @@ -264,8 +298,8 @@ def find_project(self, subject_id, projects=None): the search to. Defaults to None. Returns: - str: The name of the XNAT project the subject belongs to. Note: - if the same ID is found in more than one project only the + str or None: The name of the XNAT project the subject belongs to. + Note: if the same ID is found in more than one project only the first match is returned. """ if not projects: @@ -280,6 +314,7 @@ def find_project(self, subject_id, projects=None): logger.debug( f"Found session {subject_id} in project {project}") return project + return None def get_subject_ids(self, project): """Retrieve the IDs for all subjects within an XNAT project. @@ -303,8 +338,9 @@ def get_subject_ids(self, project): try: result = self._make_xnat_query(url) - except Exception: - raise XnatException(f"Failed getting xnat subjects with URL {url}") + except Exception as e: + raise XnatException(f"Failed getting xnat subjects with URL {url}" + ) from e if not result: return [] @@ -312,7 +348,8 @@ def get_subject_ids(self, project): try: subids = [item["label"] for item in result["ResultSet"]["Result"]] except KeyError as e: - raise XnatException(f"get_subject_ids - Malformed response. {e}") + raise XnatException(f"get_subject_ids - Malformed response. {e}" + ) from None return subids @@ -340,9 +377,10 @@ def get_subject(self, project, subject_id, create=False): try: result = self._make_xnat_query(url) - except Exception: + except Exception as e: raise XnatException( - f"Failed getting subject {subject_id} with URL {url}") + f"Failed getting subject {subject_id} with URL {url}" + ) from e if not create and not result: raise XnatException( @@ -355,9 +393,9 @@ def get_subject(self, project, subject_id, create=False): try: subject_json = result["items"][0] - except (IndexError, KeyError): + except (IndexError, KeyError) as e: raise XnatException( - f"Could not access metadata for subject {subject_id}") + f"Could not access metadata for subject {subject_id}") from e return XNATSubject(subject_json) @@ -378,7 +416,7 @@ def make_subject(self, project, subject): except requests.exceptions.RequestException as e: raise XnatException( f"Failed to create xnat subject {subject} in project " - f"{project}. Reason - {e}") + f"{project}. Reason - {e}") from e def find_subject(self, project, exper_id): """Find the parent subject ID for an experiment. @@ -398,9 +436,9 @@ def find_subject(self, project, exper_id): try: result = self._make_xnat_query(url) - except Exception: - XnatException(f"Failed to query XNAT server {project} for " - f"experiment {exper_id}") + except Exception as e: + raise XnatException(f"Failed to query XNAT server {project} for " + f"experiment {exper_id}") from e return result["items"][0]["data_fields"]["subject_ID"] def get_experiment_ids(self, project, subject=""): @@ -429,16 +467,17 @@ def get_experiment_ids(self, project, subject=""): try: result = self._make_xnat_query(url) - except Exception: + except Exception as e: raise XnatException( f"Failed getting experiment IDs for subject {subject}" - f" with URL {url}") + f" with URL {url}") from e if not result: return [] return [item.get("label") for item in result["ResultSet"]["Result"]] + # pylint: disable-next=too-many-arguments,too-many-positional-arguments def get_experiment(self, project, subject_id=None, exper_id=None, create=False, ident=None): """Get an experiment from the XNAT server. @@ -483,8 +522,9 @@ def get_experiment(self, project, subject_id=None, exper_id=None, try: result = self._make_xnat_query(url) - except Exception: - raise XnatException(f"Failed getting experiment with URL {url}") + except Exception as e: + raise XnatException(f"Failed getting experiment with URL {url}" + ) from e if not create and not result: raise XnatException( @@ -499,9 +539,9 @@ def get_experiment(self, project, subject_id=None, exper_id=None, try: exper_json = result["items"][0] - except (IndexError, KeyError): + except (IndexError, KeyError) as e: raise XnatException( - f"Could not access metadata for experiment {exper_id}") + f"Could not access metadata for experiment {exper_id}") from e return XNATExperiment(project, subject_id, exper_json, ident=ident) @@ -525,7 +565,7 @@ def make_experiment(self, project, subject, experiment): except requests.exceptions.RequestException as e: raise XnatException( f"Failed to create XNAT experiment {experiment} under " - f"subject {subject} in project {project}. Reason - {e}") + f"subject {subject} in project {project}. Reason - {e}") from e def get_scan_ids(self, project, subject, experiment): """Retrieve all scan IDs for an XNAT experiment. @@ -553,10 +593,10 @@ def get_scan_ids(self, project, subject, experiment): try: result = self._make_xnat_query(url) - except Exception: + except Exception as e: raise XnatException( f"Failed getting scan IDs for experiment {experiment} with " - f"URL {url}") + f"URL {url}") from e if not result: return [] @@ -566,59 +606,16 @@ def get_scan_ids(self, project, subject, experiment): item.get("ID") for item in result["ResultSet"]["Result"] ] except KeyError as e: - raise XnatException(f"get_scan_ids - Malformed response. {e}") + raise XnatException(f"get_scan_ids - Malformed response. {e}" + ) from None return scan_ids - def get_scan(self, project, subject_id, exper_id, scan_id): - """Get a scan from the XNAT server. - - Args: - project (:obj:`str`): The XNAT project to search within. - subject_id (:obj:`str`): The XNAT subject to search. - exper_id (:obj:`str`): The XNAT experiment to search. - scan_id (:obj:`str`): The ID of the scan to retrieve. - - Raises: - XnatException: If the scan does not exist or the server/API can't - be accessed. - - Returns: - :obj:`datman.xnat.XNATScan`: An XNATScan instance matching the - scan ID from the given experiment. - """ - logger.debug( - f"Querying XNAT server {self.server} for scan {scan_id} in " - f"experiment {exper_id} belonging to subject {subject_id} in " - f"project {project}") - - url = ( - f"{self.server}/data/archive/projects/{project}/subject_ids/" - f"{subject_id}/exper_ids/{exper_id}/scans/{scan_id}/?format=json") - - try: - result = self._make_xnat_query(url) - except Exception: - raise XnatException(f"Failed getting scan with URL {url}") - - if not result: - raise XnatException( - f"Scan {scan_id} does not exist for experiment {exper_id} " - f"in project {project}") - - try: - scan_json = result["items"][0] - except (IndexError, KeyError): - raise XnatException( - f"Could not access metadata for scan {scan_id}") - - return XNATScan(project, subject_id, exper_id, scan_json) - def get_resource_ids(self, study, session, experiment, - folderName=None, + folder_name=None, create=True): """ Return a list of resource id's (subfolders) from an experiment @@ -629,8 +626,9 @@ def get_resource_ids(self, "/resources/?format=json") try: result = self._make_xnat_query(url) - except Exception: - raise XnatException(f"Failed getting resource ids with url: {url}") + except Exception as e: + raise XnatException(f"Failed getting resource ids with url: {url}" + ) from e if result is None: raise XnatException( f"Experiment: {experiment} not found for session: {session}" @@ -638,27 +636,26 @@ def get_resource_ids(self, if create and int(result["ResultSet"]["totalRecords"]) < 1: return self.create_resource_folder(study, session, experiment, - folderName) + folder_name) resource_ids = {} for r in result["ResultSet"]["Result"]: label = r.get("label", "No Label") resource_ids[label] = r["xnat_abstractresource_id"] - if not folderName: + if not folder_name: # foldername not specified return them all - resource_id = [val for val in resource_ids.values()] + resource_id = list(resource_ids.values()) else: # check if folder exists, if not create it try: - resource_id = resource_ids[folderName] + resource_id = resource_ids[folder_name] except KeyError: # folder doesn't exist, create it if not create: return None - else: - resource_id = self.create_resource_folder( - study, session, experiment, folderName) + resource_id = self.create_resource_folder( + study, session, experiment, folder_name) return resource_id @@ -681,8 +678,9 @@ def get_resource_list(self, study, session, experiment, resource_id): f"/resources/{resource_id}/?format=xml") try: result = self._make_xnat_xml_query(url) - except Exception: - raise XnatException(f"Failed getting resources with url: {url}") + except Exception as e: + raise XnatException(f"Failed getting resources with url: {url}" + ) from e if result is None: raise XnatException( f"Experiment: {experiment} not found for session: {session}" @@ -711,23 +709,23 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3): try: with open(filename, "rb") as data: - self._make_xnat_post(upload_url, data, retries, headers) + self.make_xnat_post(upload_url, data, retries, headers) except XnatException as e: e.study = project e.session = experiment raise e + except requests.exceptions.RequestException as e: + err = XnatException(f"Error uploading data with url: {upload_url}") + err.study = project + err.session = experiment + raise err from e except IOError as e: logger.error( f"Failed to open file: {filename} with excuse: {e.strerror}") err = XnatException(f"Error in file: {filename}") err.study = project err.session = experiment - raise err - except requests.exceptions.RequestException: - err = XnatException(f"Error uploading data with url: {upload_url}") - err.study = project - err.session = experiment - raise err + raise err from e def get_dicom(self, project, @@ -751,34 +749,32 @@ def get_dicom(self, os.close(filename[0]) filename = filename[1] try: - self._get_xnat_stream(url, filename, retries) + self.get_xnat_stream(url, filename, retries) return filename - except Exception: + except Exception as e: try: os.remove(filename) - except OSError as e: + except OSError as exc: logger.warning(f"Failed to delete tempfile: {filename} with " - f"excuse: {str(e)}") + f"excuse: {str(exc)}") err = XnatException(f"Failed getting dicom with url: {url}") err.study = project err.session = session - raise err + raise err from e - def put_resource(self, - project, - subject, - experiment, - filename, - data, - folder, - retries=3): - """ - POST a resource file to the xnat server + def put_resource(self, project, subject, experiment, filename, data, + folder): + """Upload a resource file to the XNAT server. Args: - filename: string to store filename as - data: string containing data - (such as produced by zipfile.ZipFile.read()) + project (:obj:`str`): the project to upload to. + subject (:obj:`str`): The subject ID to upload to. + experiment (:obj:`str`): the experiment ID to upload to. + filename (:obj:`str`): The absolute path to a file to upload + data (bytes): Bytes as produced from reading a file with + ZipFile.read + folder (:obj:`str`): The folder name to deposit the file in on + XNAT. """ @@ -793,7 +789,7 @@ def put_resource(self, resource_id = self.get_resource_ids(project, subject, experiment, - folderName=folder) + folder_name=folder) uploadname = urllib.parse.quote(filename) @@ -803,17 +799,18 @@ def put_resource(self, f"files/{uploadname}?inbody=true") try: - self._make_xnat_post(attach_url, data) + self.make_xnat_post(attach_url, data) except XnatException as err: err.study = project err.session = experiment raise err - except Exception: + except Exception as e: logger.warning( f"Failed adding resource to xnat with url: {attach_url}") err = XnatException("Failed adding resource to xnat") err.study = project err.session = experiment + raise err from e def get_resource( self, @@ -844,16 +841,17 @@ def get_resource( os.close(filename[0]) filename = filename[1] try: - self._get_xnat_stream(url, filename, retries) + self.get_xnat_stream(url, filename, retries) return filename - except Exception: + except Exception as e: try: os.remove(filename) - except OSError as e: + except OSError as exc: logger.warning(f"Failed to delete tempfile: {filename} with " - f"exclude: {str(e)}") + f"exclude: {str(exc)}") logger.error("Failed getting resource from xnat", exc_info=True) - raise XnatException(f"Failed downloading resource with url: {url}") + raise XnatException(f"Failed downloading resource with url: {url}" + ) from e def get_resource_archive( self, @@ -879,36 +877,30 @@ def get_resource_archive( os.close(filename[0]) filename = filename[1] try: - self._get_xnat_stream(url, filename, retries) + self.get_xnat_stream(url, filename, retries) return filename - except Exception: + except Exception as e: try: os.remove(filename) - except OSError as e: + except OSError as exc: logger.warning(f"Failed to delete tempfile: {filename} with " - f"error: {str(e)}") + f"error: {str(exc)}") logger.error("Failed getting resource archive from xnat", exc_info=True) raise XnatException( - f"Failed downloading resource archive with url: {url}") + f"Failed downloading resource archive with url: {url}") from e - def delete_resource( - self, - project, - session, - experiment, - resource_group_id, - resource_id, - retries=3, - ): + def delete_resource(self, project, session, experiment, resource_group_id, + resource_id): """Delete a resource file from xnat""" url = (f"{self.server}/data/archive/projects/{project}/" f"subjects/{session}/experiments/{experiment}/" f"resources/{resource_group_id}/files/{resource_id}") try: self._make_xnat_delete(url) - except Exception: - raise XnatException(f"Failed deleting resource with url: {url}") + except Exception as e: + raise XnatException(f"Failed deleting resource with url: {url}" + ) from e def rename_subject(self, project, old_name, new_name, rename_exp=False): """Change a subjects's name on XNAT. @@ -943,8 +935,8 @@ def rename_subject(self, project, old_name, new_name, rename_exp=False): except requests.HTTPError as e: if e.response.status_code == 409: raise XnatException(f"Can't rename {old_name} to {new_name}." - "Subject already exists") - elif e.response.status_code == 422: + "Subject already exists") from None + if e.response.status_code == 422: # This is raised every time a subject is renamed. pass else: @@ -953,8 +945,6 @@ def rename_subject(self, project, old_name, new_name, rename_exp=False): if rename_exp: self.rename_experiment(project, new_name, old_name, new_name) - return - def rename_experiment(self, project, subject, old_name, new_name): """Change an experiment's name on XNAT. @@ -1036,9 +1026,8 @@ def share_subject(self, source_project, source_sub, dest_project, if e.response.status_code == 409: raise XnatException( f"Can't share {source_sub} as {dest_sub}, subject " - "ID already exists.") - else: - raise e + "ID already exists.") from None + raise e def share_experiment(self, source_project, source_sub, source_exp, dest_project, dest_exp): @@ -1077,9 +1066,8 @@ def share_experiment(self, source_project, source_sub, source_exp, except requests.HTTPError as e: if e.response.status_code == 409: raise XnatException(f"Can't share {source_exp} as {dest_exp}" - " experiment ID already exists") - else: - raise e + " experiment ID already exists") from None + raise e def dismiss_autorun(self, experiment): """Mark the AutoRun.xml pipeline as finished. @@ -1099,37 +1087,39 @@ def dismiss_autorun(self, experiment): "?wrk:workflowData/status=Complete") self._make_xnat_put(dismiss_url) - def _get_xnat_stream(self, url, filename, retries=3, timeout=300): + def get_xnat_stream(self, url, filename, retries=3, timeout=300): + """Get large objects from XNAT in a stream. + """ logger.debug(f"Getting {url} from XNAT") try: response = self.session.get(url, stream=True, timeout=timeout) except requests.exceptions.Timeout as e: if retries > 0: - return self._get_xnat_stream(url, - filename, - retries=retries - 1, - timeout=timeout * 2) - else: - raise e + return self.get_xnat_stream(url, + filename, + retries=retries - 1, + timeout=timeout * 2) + raise e if response.status_code == 401: logger.info("Session may have expired, resetting") self.open_session() - return self._get_xnat_stream( + return self.get_xnat_stream( url, filename, retries=retries, timeout=timeout) if response.status_code == 404: logger.info( f"No records returned from xnat server for query: {url}") - return - elif response.status_code == 504: + return None + + if response.status_code == 504: if retries: logger.warning("xnat server timed out, retrying") time.sleep(30) - self._get_xnat_stream(url, - filename, - retries=retries - 1, - timeout=timeout * 2) + self.get_xnat_stream(url, + filename, + retries=retries - 1, + timeout=timeout * 2) else: logger.error("xnat server timed out, giving up") response.raise_for_status() @@ -1143,10 +1133,11 @@ def _get_xnat_stream(self, url, filename, retries=3, timeout=300): f.write(chunk) except requests.exceptions.RequestException as e: logger.error("Failed reading from xnat") - raise (e) + raise e except IOError as e: logger.error("Failed writing to file") - raise (e) + raise e + return None def _make_xnat_query(self, url, retries=3, timeout=150): try: @@ -1156,9 +1147,8 @@ def _make_xnat_query(self, url, retries=3, timeout=150): return self._make_xnat_query( url, retries=retries - 1, timeout=timeout * 2 ) - else: - logger.error(f"Xnat server timed out getting url {url}") - raise e + logger.error(f"Xnat server timed out getting url {url}") + raise e if response.status_code == 401: # possibly the session has timed out @@ -1169,12 +1159,14 @@ def _make_xnat_query(self, url, retries=3, timeout=150): if response.status_code == 404: logger.info( f"No records returned from xnat server for query: {url}") - return - elif not response.status_code == requests.codes.ok: + return None + + if response.status_code != 200: logger.error(f"Failed connecting to xnat server {self.server} " f"with response code {response.status_code}") logger.debug("Username: {}") response.raise_for_status() + return response.json() def _make_xnat_xml_query(self, url, retries=3): @@ -1183,8 +1175,7 @@ def _make_xnat_xml_query(self, url, retries=3): except requests.exceptions.Timeout as e: if retries > 0: return self._make_xnat_xml_query(url, retries=retries - 1) - else: - raise e + raise e if response.status_code == 401: # possibly the session has timed out @@ -1194,19 +1185,22 @@ def _make_xnat_xml_query(self, url, retries=3): if response.status_code == 404: logger.info(f"No records returned from xnat server to query {url}") - return - elif not response.status_code == requests.codes.ok: + return None + if response.status_code != 200: logger.error(f"Failed connecting to xnat server {self.server}" f" with response code {response.status_code}") - logger.debug("Username: {}") + logger.debug(f"Username: {self.auth[0]}") response.raise_for_status() root = ElementTree.fromstring(response.content) return root def _make_xnat_put(self, url, retries=3): + """Modify XNAT contents. + """ if retries == 0: - logger.info(f"Timed out making xnat put {url}") - requests.exceptions.HTTPError() + raise requests.exceptions.HTTPError( + f"Timed out adding data to xnat {url}" + ) try: response = self.session.put(url, timeout=30) @@ -1224,8 +1218,11 @@ def _make_xnat_put(self, url, retries=3): f"http client error at folder creation: {response.status_code}" ) response.raise_for_status() + return None - def _make_xnat_post(self, url, data, retries=3, headers=None): + def make_xnat_post(self, url, data, retries=3, headers=None): + """Add data to XNAT. + """ logger.debug(f"POSTing data to xnat, {retries} retries left") response = self.session.post(url, headers=headers, @@ -1244,7 +1241,7 @@ def _make_xnat_post(self, url, data, retries=3, headers=None): if retries: logger.warning("xnat server timed out, retrying") time.sleep(30) - self._make_xnat_post(url, data, retries=retries - 1) + self.make_xnat_post(url, data, retries=retries - 1) else: logger.warning("xnat server timed out, giving up") response.raise_for_status() @@ -1258,10 +1255,9 @@ def _make_xnat_post(self, url, data, retries=3, headers=None): if "Unable to identify experiment" in reply: raise XnatException("Unable to identify experiment, did " "dicom upload fail?") - else: - raise XnatException("An unknown error occured uploading data." - f"Status code: {response.status_code}, " - f"reason: {reply}") + raise XnatException("An unknown error occured uploading data." + f"Status code: {response.status_code}, " + f"reason: {reply}") return reply def _make_xnat_delete(self, url, retries=3): @@ -1280,6 +1276,7 @@ def _make_xnat_delete(self, url, retries=3): logger.warning( f"http client error deleting resource: {response.status_code}") response.raise_for_status() + return None def __str__(self): return f"" diff --git a/pyproject.toml b/pyproject.toml index 7ed546dd..4495cff4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,5 +116,12 @@ logging-format-style = "new" disable = [ "logging-format-interpolation", "too-many-lines", - "logging-fstring-interpolation" + "logging-fstring-interpolation", + "too-many-instance-attributes", + "too-many-arguments", + "too-many-positional-arguments", + "too-few-public-methods" ] + +[tool.pylint.format] +max-line-length = 79 \ No newline at end of file From f25590442f48e5d0414235e6fc8c2712830ca127 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 17 Mar 2025 16:39:57 -0400 Subject: [PATCH 11/45] [FIX] Stop repeated export for NiiLinkExporter --- datman/exporters.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datman/exporters.py b/datman/exporters.py index 339d8b3d..0cf86d07 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -999,6 +999,9 @@ def export(self, *args, **kwargs): f"mapping {self.name_map}") return + if self.outputs_exist(): + return + self.make_output_dir() for dm_name, bids_name in self.name_map.items(): if bids_name == "missing": @@ -1407,6 +1410,9 @@ def _get_file(self, fname, ext): """ found = os.path.join(self.nii_path, fname + ext) if not os.path.exists(found): + bl_found = os.path.join(self.nii_path, 'blacklisted', fname + ext) + if os.path.exists(bl_found): + return bl_found logger.debug(f"File not found {found}") return None return found From 0a6d72ca583d031311f1e0751020b76c4fa222b4 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 17 Mar 2025 16:40:20 -0400 Subject: [PATCH 12/45] [FIX] Update timeout for uploading zip files --- datman/xnat.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/datman/xnat.py b/datman/xnat.py index 3cfdc7e1..dfae2e5f 100644 --- a/datman/xnat.py +++ b/datman/xnat.py @@ -699,7 +699,8 @@ def get_resource_list(self, study, session, experiment, resource_id): return items - def put_dicoms(self, project, subject, experiment, filename, retries=3): + def put_dicoms(self, project, subject, experiment, filename, retries=3, + timeout=7200): """Upload an archive of dicoms to XNAT filename: archive to upload""" headers = {"Content-Type": "application/zip"} @@ -711,7 +712,13 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3): try: with open(filename, "rb") as data: - self._make_xnat_post(upload_url, data, retries, headers) + self._make_xnat_post(upload_url, data, retries=retries, + headers=headers, timeout=timeout) + except requests.exception.Timeout as e: + if retries == 1: + raise e + self.put_dicoms(project, subject, experiment, filename, + retries=retries-1, timeout=timeout+1200) except XnatException as e: e.study = project e.session = experiment @@ -1225,12 +1232,12 @@ def _make_xnat_put(self, url, retries=3): ) response.raise_for_status() - def _make_xnat_post(self, url, data, retries=3, headers=None): + def _make_xnat_post(self, url, data, retries=3, headers=None, timeout=3600): logger.debug(f"POSTing data to xnat, {retries} retries left") response = self.session.post(url, headers=headers, data=data, - timeout=60 * 60) + timeout=timeout) reply = str(response.content) From 169271b62d99d4aa2370d019dba6e1a8af500724 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 17 Mar 2025 17:18:09 -0400 Subject: [PATCH 13/45] [FIX] Attribute referenced before assignment --- datman/importers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datman/importers.py b/datman/importers.py index c6d0bb0f..680fafc9 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -983,11 +983,11 @@ def __init__(self, ident, zip_path): self.ident = ident self.name = zip_path self.path = zip_path - self.date = self.scans[0].date self.contents = self.parse_contents() self.scans = self.get_scans() self.resource_files = self.contents['resources'] self.dcm_subdir = os.path.split(self.scans[0].series_dir)[0] + self.date = self.scans[0].date @property def ident(self) -> 'datman.scanid.Identifier': From 4549a6eed1180830f10211bee4933b494cef917b Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 18 Mar 2025 12:24:59 -0400 Subject: [PATCH 14/45] [FIX] Fix ZipImporter string and let NiiLinkExporter remove broken symlinks --- datman/exporters.py | 10 ++++++++++ datman/importers.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datman/exporters.py b/datman/exporters.py index 048e07bb..451873db 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -1061,6 +1061,16 @@ def make_link(self, dm_file, bids_file): for source in glob(bids_file + '*'): ext = get_extension(source) target = base_target + ext + + if os.path.islink(target) and not os.path.exists(target): + # Remove a broken symlink + try: + os.unlink(target) + except Exception as exc: + logger.error( + f"Failed to remove broken symlink {target} - {exc}") + continue + rel_source = get_relative_source(source, target) try: os.symlink(rel_source, target) diff --git a/datman/importers.py b/datman/importers.py index 680fafc9..f48954b2 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -1129,7 +1129,7 @@ def get_scans(self) -> list['ZipSeriesImporter']: return list(scans.values()) def __str__(self): - return f"" def __repr__(self): return self.__str__() From 276e9b33b8917ae7357290bd707d95b35f0e06e2 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 18 Mar 2025 13:21:19 -0400 Subject: [PATCH 15/45] [FIX] Ensure is_usable is consistent across SeriesImporters --- datman/importers.py | 70 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/datman/importers.py b/datman/importers.py index f48954b2..3ba6498c 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -146,6 +146,15 @@ def subject(self) -> str: on XNAT). """ + @property + @abstractmethod + def experiment(self) -> str: + """The experiment ID of the session this scan belongs to. + + The experiment ID should be the 'full' ID of the session (i.e. with + all ID fields included). + """ + @property @abstractmethod def description(self) -> str: @@ -171,8 +180,8 @@ def uid(self) -> str: """ @abstractmethod - def is_usable(self) -> bool: - """Indicates whether the series contains usable dcm files. + def raw_dicoms_exist(self) -> bool: + """Indicates whether the series contains dicom files. """ @abstractmethod @@ -210,6 +219,37 @@ def is_derived(self) -> bool: return True return False + def is_usable(self, strict=False): + """Indicates whether the series contains usable dcm files. + + Args: + strict (bool, optional): If set, 'derived' scans will be marked + unusable. + """ + if not self.raw_dicoms_exist(): + logger.debug(f"Ignoring {self.series} for {self.experiment}. " + f"No RAW dicoms exist.") + return False + + if not self.description: + logger.error(f"Can't find description for series {self.series} " + f"from session {self.experiment}.") + return False + + if not strict: + return True + + if self.is_derived(): + logger.debug( + f"Series {self.series} in session {self.experiment} is a " + "derived scan. Ignoring.") + return False + + if not self.names: + return False + + return True + class XNATObject(ABC): """A meta class for classes that manage XNAT contents. @@ -683,6 +723,14 @@ def subject(self) -> str: def subject(self, value: str): self._subject = value + @property + def experiment(self) -> str: + return self._experiment + + @experiment.setter + def experiment(self, value: str): + self._experiment = value + @property def description(self) -> str: return self._description @@ -1109,7 +1157,7 @@ def get_scans(self) -> list['ZipSeriesImporter']: for sub_path, header in headers.items(): # .get_full_subjectid may need to be changed for compatibility zip_scan = ZipSeriesImporter( - self.ident.get_full_subjectid(), self.path, sub_path, + self.ident, self.path, sub_path, header, self.contents['scans'][sub_path] ) if zip_scan.series in scans: @@ -1140,8 +1188,10 @@ class ZipSeriesImporter(SeriesImporter): """ # pylint: disable-next=too-many-arguments,too-many-positional-arguments - def __init__(self, subject, zip_file, series_dir, header, zip_items): - self.subject = subject + def __init__(self, ident, zip_file, series_dir, header, zip_items): + self.ident = ident + self.subject = ident.get_full_subjectid() + self.experiment = ident.get_full_subjectid_with_timepoint_session() self.zip_file = zip_file self.series_dir = series_dir self.header = header @@ -1179,6 +1229,14 @@ def subject(self) -> str: def subject(self, value: str): self._subject = value + @property + def experiment(self) -> str: + return self._experiment + + @experiment.setter + def experiment(self, value: str): + self._experiment = value + @property def description(self) -> str: return self._description @@ -1215,7 +1273,7 @@ def uid(self) -> list[str]: def uid(self, value: list[str]): self._uid = value - def is_usable(self): + def raw_dicoms_exist(self) -> bool: return any(item.endswith(".dcm") for item in self.contents) def get_files(self, dest_dir: str, *args, **kwargs): From ac398d859d14bebe2dbc8132bd5e7383cd3fda12 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 19 Mar 2025 20:19:04 -0400 Subject: [PATCH 16/45] [FIX] Added required additional attributes --- datman/importers.py | 56 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/datman/importers.py b/datman/importers.py index 3ba6498c..41d25c6c 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -173,12 +173,26 @@ def image_type(self) -> str: """The ImageType from the dicom headers. """ + @property + @abstractmethod + def type(self) -> str: + """The 'XnatType' or an equivalent (usually equals series description). + """ + @property @abstractmethod def uid(self) -> str: """The UID from the dicom headers. """ + @property + @abstractmethod + def echo_dict(self) -> dict: + """A dictionary mapping each echo to its intended output name. + + This will remain an empty dict if the scan is not multi echo. + """ + @abstractmethod def raw_dicoms_exist(self) -> bool: """Indicates whether the series contains dicom files. @@ -200,6 +214,11 @@ def set_tag(self, tag_map): """Set the scan tag for the scan. """ + @abstractmethod + def is_multiecho(self) -> bool: + """Check if the series is multiecho. + """ + def _mangle_descr(self) -> str: """Modify a series description to remove non-alphanumeric characters. """ @@ -685,7 +704,7 @@ def __init__(self, experiment, scan_json): self.image_type = self.get_field("parameters/imageType") self.multiecho = self.is_multiecho() self.description = self._set_description() - self.type = self.get_field("type") + self._type = self.get_field("type") self.names = [] self.echo_dict = {} # Will remain empty unless scan is multi-echo self.tags = [] @@ -763,13 +782,25 @@ def uid(self) -> list[str]: def uid(self, value: list[str]): self._uid = value + @property + def type(self) -> str: + return self._type + + @property + def echo_dict(self) -> dict: + return self._echo_dict + + @echo_dict.setter + def echo_dict(self, value): + self._echo_dict = value + def _set_description(self): series_descr = self.get_field("series_description") if series_descr: return series_descr return self.get_field("type") - def is_multiecho(self): + def is_multiecho(self) -> bool: """Check if the series is multiecho. """ try: @@ -1204,6 +1235,7 @@ def __init__(self, ident, zip_file, series_dir, header, zip_items): self.names = [] self.tags = [] self.dcm_dir = None + self.echo_dict = {} @property def dcm_dir(self) -> str: @@ -1273,6 +1305,26 @@ def uid(self) -> list[str]: def uid(self, value: list[str]): self._uid = value + @property + def type(self) -> str: + return self.description + + @property + def echo_dict(self) -> dict: + return self._echo_dict + + @echo_dict.setter + def echo_dict(self, value): + self._echo_dict = value + + def is_multiecho(self) -> bool: + """Check if the series is multiecho. + + This can't be determined without the configuration files so will + be False until set_datman_name() has been called at least once. + """ + return self.echo_dict + def raw_dicoms_exist(self) -> bool: return any(item.endswith(".dcm") for item in self.contents) From cb1ea630ec37c829b8095b285a2b628a11b60701 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 20 Mar 2025 16:48:11 -0400 Subject: [PATCH 17/45] [FIX] typo when referencing timeout exception --- datman/xnat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datman/xnat.py b/datman/xnat.py index e84d5a38..ad2f995a 100644 --- a/datman/xnat.py +++ b/datman/xnat.py @@ -712,7 +712,7 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3, with open(filename, "rb") as data: self.make_xnat_post(upload_url, data, retries=retries, headers=headers, timeout=timeout) - except requests.exception.Timeout as e: + except requests.exceptions.Timeout as e: if retries == 1: raise e self.put_dicoms(project, subject, experiment, filename, From d5c7744822cbccf91472c368b94c1835a1b93ea6 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 24 Mar 2025 17:43:43 -0400 Subject: [PATCH 18/45] [FIX] Greatly increase timeout value, update upload options --- datman/xnat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datman/xnat.py b/datman/xnat.py index ad2f995a..bb7261a2 100644 --- a/datman/xnat.py +++ b/datman/xnat.py @@ -698,7 +698,7 @@ def get_resource_list(self, study, session, experiment, resource_id): return items def put_dicoms(self, project, subject, experiment, filename, retries=3, - timeout=7200): + timeout=86400): """Upload an archive of dicoms to XNAT filename: archive to upload""" headers = {"Content-Type": "application/zip"} @@ -706,7 +706,7 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3, upload_url = ( f"{self.server}/data/services/import?project={project}" f"&subject={subject}&session={experiment}&overwrite=delete" - "&prearchive=false&inbody=true") + "&prearchive=false&Ignore-Unparsable=true&inbody=true") try: with open(filename, "rb") as data: From dcd98e14784ede89b11c4db753626ecdc742b677 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 24 Mar 2025 17:46:56 -0400 Subject: [PATCH 19/45] [FIX] Update function reference --- bin/dm_xnat_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/dm_xnat_upload.py b/bin/dm_xnat_upload.py index ef3c6686..e0f7264c 100755 --- a/bin/dm_xnat_upload.py +++ b/bin/dm_xnat_upload.py @@ -321,7 +321,7 @@ def check_files_exist(archive, xnat_experiment, xnat): logger.error("Failed getting zip file headers for: {}".format(archive)) return False, False - xnat_resources = xnat_experiment.get_resources(xnat) + xnat_resources = xnat_experiment.get_resource_uris(xnat) if not local_headers: resources_exist = resource_data_exists(xnat_resources, archive) From 0edbcaccc78d8f27bf9f6276edb4832a6d145f67 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 24 Mar 2025 19:15:09 -0400 Subject: [PATCH 20/45] [FIX] Correct errors with zip parsing and alternate file types --- datman/importers.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/datman/importers.py b/datman/importers.py index 41d25c6c..295312be 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -1166,19 +1166,25 @@ def parse_contents(self) -> dict: 'resources': [] } with ZipFile(self.path, "r") as fh: - par_dir = fh.filelist[0].filename.strip('/') - for item in fh.filelist[1:]: + for item in fh.filelist: if item.is_dir(): - contents['scans'].setdefault(item.filename.strip('/'), []) - else: + continue + + if self.is_scan(item.filename): folder, _ = os.path.split(item.filename) - if folder == par_dir: - contents['resources'].append(item.filename) - else: - contents['scans'].setdefault(folder, []).append( - item.filename) + contents['scans'].setdefault(folder, []).append( + item.filename) + else: + contents['resources'].append(item.filename) return contents + def is_scan(self, fname): + if fname.endswith(".dcm"): + return True + if fname.endswith(".IMA"): + return True + return False + def get_scans(self) -> list['ZipSeriesImporter']: """Get ZipSeriesImporters for each scan in the session. """ @@ -1186,11 +1192,15 @@ def get_scans(self) -> list['ZipSeriesImporter']: scans = {} duplicate_series = set() for sub_path, header in headers.items(): - # .get_full_subjectid may need to be changed for compatibility - zip_scan = ZipSeriesImporter( - self.ident, self.path, sub_path, - header, self.contents['scans'][sub_path] - ) + try: + zip_scan = ZipSeriesImporter( + self.ident, self.path, sub_path, + header, self.contents['scans'][sub_path] + ) + except KeyError: + logger.error(f"Subdirectory {sub_path} not found in contents for {self.path}.") + continue + if zip_scan.series in scans: duplicate_series.add(zip_scan.series) else: From b4006dcdc6a0cf5617561db7e1024ece193934c8 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 26 Mar 2025 20:00:48 -0400 Subject: [PATCH 21/45] [FIX] Allow zip parsing when dicoms have no extension --- datman/importers.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/datman/importers.py b/datman/importers.py index 295312be..fa5980de 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -13,6 +13,7 @@ import os import re import shutil +from pathlib import Path from zipfile import ZipFile, BadZipFile from datman.exceptions import ParseException, XnatException @@ -1179,9 +1180,21 @@ def parse_contents(self) -> dict: return contents def is_scan(self, fname): - if fname.endswith(".dcm"): + item = Path(fname) + ext = item.suffix + if ext == ".dcm": return True - if fname.endswith(".IMA"): + if ext.upper() == ".IMA": + return True + if (item.parent.name.upper() == "DICOM" or + item.parent.name.upper() == "SECONDARY"): + # Some zip files label their folders 'dicom' but the files + # themself have no extension and are labelled by UID, in which + # case 'ext' will look like a floating point number + try: + float(ext) + except ValueError: + return False return True return False From ef8724e3e334bef2c7542e0a47f644ee1d168aa8 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Apr 2025 12:18:50 -0400 Subject: [PATCH 22/45] [FIX] Update scan importer repr + bids inventory to catch error files Error files needed the session number to be match-able to the right scan session + bids inventory needs to include error files to ensure blacklisting removal etc. can correctly handle them. --- datman/importers.py | 10 ++++++++-- datman/scan.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/datman/importers.py b/datman/importers.py index fa5980de..3e9d6d1d 100644 --- a/datman/importers.py +++ b/datman/importers.py @@ -270,6 +270,12 @@ def is_usable(self, strict=False): return True + @property + def str_repr(self): + """Provide a consistent repr for all subclasses + """ + return f"{self.experiment} - {self.series}" + class XNATObject(ABC): """A meta class for classes that manage XNAT contents. @@ -1049,7 +1055,7 @@ def _fix_download_name(self, output_dir): return def __str__(self): - return f"" + return f"" def __repr__(self): return self.__str__() @@ -1396,7 +1402,7 @@ def set_tag(self, tag_map): return {} def __str__(self): - return f"" + return f"" def __repr__(self): return self.__str__() diff --git a/datman/scan.py b/datman/scan.py index bc8812bd..9a6c1f39 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -9,9 +9,14 @@ """ import glob import os +import re +import logging import datman.scanid as scanid import datman.utils +from datman.exceptions import ParseException + +logger = logging.getLogger(__name__) class DatmanNamed(object): @@ -197,6 +202,13 @@ def _make_bids_inventory(self): continue for item in files: + if item.endswith(".err"): + err_file = os.path.join(path, item) + ident, series = self._parse_err_file(err_file) + if ident and ident.session == self.session: + inventory.setdefault(series, []).append(err_file) + continue + if not item.endswith(".json"): continue @@ -220,6 +232,25 @@ def _make_bids_inventory(self): return inventory + def _parse_err_file(self, fname): + with open(fname, "r") as fh: + lines = fh.readlines() + + regex = ".*<.*Importer (.*) - ([0-9]+)>*" + match = re.match(regex, lines[0]) + if not match: + logger.error(f"Can't parse error file - {fname}") + return None, None + + subid, series = match.groups() + try: + ident = datman.scan.parse(subid) + except ParseException: + logger.error(f"Unparseable ID found in error file - {subid}") + return None, series + + return ident, series + def get_tagged_nii(self, tag): try: matched_niftis = self.__nii_dict[tag] From 81bee314423264105b7f810967ab7be9de3278c3 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Apr 2025 13:51:44 -0400 Subject: [PATCH 23/45] [FIX] Update references to datman.scanid At somepoint in the past I changed the import to scanid but didnt fully update all references so I just switched it back to avoid name reference errors --- datman/scan.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/datman/scan.py b/datman/scan.py index 9a6c1f39..a6b505c1 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -12,9 +12,8 @@ import re import logging -import datman.scanid as scanid +import datman.scanid import datman.utils -from datman.exceptions import ParseException logger = logging.getLogger(__name__) @@ -60,7 +59,7 @@ def __init__(self, path): path_minus_ext = path.replace(self.ext, "") try: - ident, tag, series, description = scanid.parse_filename( + ident, tag, series, description = datman.scanid.parse_filename( path_minus_ext) except datman.scanid.ParseException: # re-raise the exception with a more descriptive message @@ -148,7 +147,7 @@ def niftis(self): def _get_ident(self, subid): subject_id = self.__check_session(subid) try: - ident = scanid.parse(subject_id) + ident = datman.scanid.parse(subject_id) except datman.scanid.ParseException: raise datman.scanid.ParseException( f"{subject_id} does not match datman convention") @@ -245,7 +244,7 @@ def _parse_err_file(self, fname): subid, series = match.groups() try: ident = datman.scan.parse(subid) - except ParseException: + except datman.scanid.ParseException: logger.error(f"Unparseable ID found in error file - {subid}") return None, series @@ -260,7 +259,7 @@ def get_tagged_nii(self, tag): def get_resource_dir(self, session): for resource_dir in self.resources: - ident = scanid.parse(os.path.basename(resource_dir)) + ident = datman.scanid.parse(os.path.basename(resource_dir)) if int(ident.session) != int(session): continue if os.path.exists(resource_dir): From 6f9f270d16452cc4dd0d0dad742edf52c4a958e0 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Apr 2025 14:00:58 -0400 Subject: [PATCH 24/45] [FIX] Ensure series number is always integer --- datman/scan.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datman/scan.py b/datman/scan.py index a6b505c1..16f3b5da 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -242,6 +242,8 @@ def _parse_err_file(self, fname): return None, None subid, series = match.groups() + series = int(series) + try: ident = datman.scan.parse(subid) except datman.scanid.ParseException: From 6ff603cba0d996203072f202fb1d50a21ed96ee2 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Apr 2025 14:06:05 -0400 Subject: [PATCH 25/45] [FIX] Fix type in reference to parse function --- datman/scan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datman/scan.py b/datman/scan.py index 16f3b5da..fa555d58 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -245,7 +245,7 @@ def _parse_err_file(self, fname): series = int(series) try: - ident = datman.scan.parse(subid) + ident = datman.scanid.parse(subid) except datman.scanid.ParseException: logger.error(f"Unparseable ID found in error file - {subid}") return None, series From 20902218662743548ee001a57a41e06c9c6ede61 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Apr 2025 22:12:08 -0400 Subject: [PATCH 26/45] [FIX] Move .err parser to utils, catch blacklisted err files --- datman/exporters.py | 13 ++++++++++--- datman/scan.py | 27 +-------------------------- datman/utils.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/datman/exporters.py b/datman/exporters.py index 451873db..8f80b175 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -29,7 +29,8 @@ make_filename, KCNIIdentifier) from datman.utils import (run, make_temp_directory, get_extension, filter_niftis, find_tech_notes, read_blacklist, - get_relative_source, read_json, write_json) + get_relative_source, read_json, write_json, + parse_err_file) try: @@ -245,8 +246,14 @@ def check_contents(self, expected, actual): ) if os.path.exists(err_file): continue - else: - missing.setdefault(scan, []).append(out_name) + + blacklisted_err = os.path.join( + self.output_dir, "blacklisted", + os.path.basename(out_name) + "_niix.err") + if os.path.exists(blacklisted_err): + continue + + missing.setdefault(scan, []).append(out_name) continue # Ignore split series, we can't handle these right now. diff --git a/datman/scan.py b/datman/scan.py index fa555d58..3f48ad05 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -9,14 +9,10 @@ """ import glob import os -import re -import logging import datman.scanid import datman.utils -logger = logging.getLogger(__name__) - class DatmanNamed(object): """ @@ -203,7 +199,7 @@ def _make_bids_inventory(self): for item in files: if item.endswith(".err"): err_file = os.path.join(path, item) - ident, series = self._parse_err_file(err_file) + ident, series = datman.utils._parse_err_file(err_file) if ident and ident.session == self.session: inventory.setdefault(series, []).append(err_file) continue @@ -231,27 +227,6 @@ def _make_bids_inventory(self): return inventory - def _parse_err_file(self, fname): - with open(fname, "r") as fh: - lines = fh.readlines() - - regex = ".*<.*Importer (.*) - ([0-9]+)>*" - match = re.match(regex, lines[0]) - if not match: - logger.error(f"Can't parse error file - {fname}") - return None, None - - subid, series = match.groups() - series = int(series) - - try: - ident = datman.scanid.parse(subid) - except datman.scanid.ParseException: - logger.error(f"Unparseable ID found in error file - {subid}") - return None, series - - return ident, series - def get_tagged_nii(self, tag): try: matched_niftis = self.__nii_dict[tag] diff --git a/datman/utils.py b/datman/utils.py index 7d225b86..45795747 100644 --- a/datman/utils.py +++ b/datman/utils.py @@ -1383,3 +1383,34 @@ def read_json(path): def write_json(path, contents): with open(path, "w") as fh: json.dump(contents, fh, indent=4) + +def parse_err_file(fname): + """Parse an error file that was generated during extraction. + + Args: + fname (:obj:`str`): The full path to an error file. + + Returns: + tuple: A tuple of a datman identifier (or None, if a valid ID does + not exist in the error file) and an integer series number (for + the series that failed to extract). + """ + with open(fname, "r") as fh: + lines = fh.readlines() + + regex = ".*<.*Importer (.*) - ([0-9]+)>*" + match = re.match(regex, lines[0]) + if not match: + logger.error(f"Can't parse error file - {fname}") + return None, None + + subid, series = match.groups() + series = int(series) + + try: + ident = scanid.parse(subid) + except scanid.ParseException: + logger.error(f"Unparseable ID found in error file - {subid}") + return None, series + + return ident, series \ No newline at end of file From 23fae5ef90146318e5fd944c086e419ef80abe4d Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Apr 2025 22:12:08 -0400 Subject: [PATCH 27/45] [FIX] Move .err parser to utils, catch blacklisted err files --- datman/exporters.py | 13 ++++++++++--- datman/scan.py | 27 +-------------------------- datman/utils.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/datman/exporters.py b/datman/exporters.py index 451873db..8f80b175 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -29,7 +29,8 @@ make_filename, KCNIIdentifier) from datman.utils import (run, make_temp_directory, get_extension, filter_niftis, find_tech_notes, read_blacklist, - get_relative_source, read_json, write_json) + get_relative_source, read_json, write_json, + parse_err_file) try: @@ -245,8 +246,14 @@ def check_contents(self, expected, actual): ) if os.path.exists(err_file): continue - else: - missing.setdefault(scan, []).append(out_name) + + blacklisted_err = os.path.join( + self.output_dir, "blacklisted", + os.path.basename(out_name) + "_niix.err") + if os.path.exists(blacklisted_err): + continue + + missing.setdefault(scan, []).append(out_name) continue # Ignore split series, we can't handle these right now. diff --git a/datman/scan.py b/datman/scan.py index fa555d58..795f0789 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -9,14 +9,10 @@ """ import glob import os -import re -import logging import datman.scanid import datman.utils -logger = logging.getLogger(__name__) - class DatmanNamed(object): """ @@ -203,7 +199,7 @@ def _make_bids_inventory(self): for item in files: if item.endswith(".err"): err_file = os.path.join(path, item) - ident, series = self._parse_err_file(err_file) + ident, series = datman.utils.parse_err_file(err_file) if ident and ident.session == self.session: inventory.setdefault(series, []).append(err_file) continue @@ -231,27 +227,6 @@ def _make_bids_inventory(self): return inventory - def _parse_err_file(self, fname): - with open(fname, "r") as fh: - lines = fh.readlines() - - regex = ".*<.*Importer (.*) - ([0-9]+)>*" - match = re.match(regex, lines[0]) - if not match: - logger.error(f"Can't parse error file - {fname}") - return None, None - - subid, series = match.groups() - series = int(series) - - try: - ident = datman.scanid.parse(subid) - except datman.scanid.ParseException: - logger.error(f"Unparseable ID found in error file - {subid}") - return None, series - - return ident, series - def get_tagged_nii(self, tag): try: matched_niftis = self.__nii_dict[tag] diff --git a/datman/utils.py b/datman/utils.py index 7d225b86..45795747 100644 --- a/datman/utils.py +++ b/datman/utils.py @@ -1383,3 +1383,34 @@ def read_json(path): def write_json(path, contents): with open(path, "w") as fh: json.dump(contents, fh, indent=4) + +def parse_err_file(fname): + """Parse an error file that was generated during extraction. + + Args: + fname (:obj:`str`): The full path to an error file. + + Returns: + tuple: A tuple of a datman identifier (or None, if a valid ID does + not exist in the error file) and an integer series number (for + the series that failed to extract). + """ + with open(fname, "r") as fh: + lines = fh.readlines() + + regex = ".*<.*Importer (.*) - ([0-9]+)>*" + match = re.match(regex, lines[0]) + if not match: + logger.error(f"Can't parse error file - {fname}") + return None, None + + subid, series = match.groups() + series = int(series) + + try: + ident = scanid.parse(subid) + except scanid.ParseException: + logger.error(f"Unparseable ID found in error file - {subid}") + return None, series + + return ident, series \ No newline at end of file From 0fb9d85f7635638ad118754fefb75be979fde357 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 30 Apr 2025 15:06:03 -0400 Subject: [PATCH 28/45] [FIX] Ensure zip resources extract to same path as XNAT resources --- bin/dm_xnat_extract.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index c672d311..95e5cd45 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -626,9 +626,16 @@ def export_resources(resource_dir, xnat, importer, dry_run=False): return if isinstance(importer, datman.importers.ZipImporter): + out_dir = os.path.join(resource_dir, "MISC") + try: + define_folder(out_dir) + except OSError: + logger.error(f"Failed creating target folder: {out_dir}") + return for item in importer.resource_files: - if not os.path.exists(item): - importer.get_resources(resource_dir, item) + dest_item = os.path.join(out_dir, item) + if not os.path.exists(dest_item): + importer.get_resources(out_dir, item) return xnat_experiment = importer From 82e35d6bf515b00a8f7b8cba604d62f63255dabc Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 30 Apr 2025 19:09:01 -0400 Subject: [PATCH 29/45] [FIX] Stop index error from happening when no gold standards found --- bin/dm_qc_report.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bin/dm_qc_report.py b/bin/dm_qc_report.py index 8571f012..ba71033e 100755 --- a/bin/dm_qc_report.py +++ b/bin/dm_qc_report.py @@ -341,10 +341,15 @@ def update_dashboard(nii_path, header_ignore=None, header_tolerance=None): db_record = datman.dashboard.get_scan(nii_path) if REMAKE or REFRESH or db_record.is_outdated_header_diffs(): + if db_record.gold_standards: + standard = db_record.gold_standards[0] + else: + standard = None try: db_record.update_header_diffs( - standard=db_record.gold_standards[0], - ignore=header_ignore, tolerance=header_tolerance) + standard=standard, + ignore=header_ignore, + tolerance=header_tolerance) except Exception as e: logger.error( f"Failed generating header diffs for {str(db_record)} due to " From cdcc264e60937630a6f47eec8634b251103682a8 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 21 May 2025 18:22:47 -0400 Subject: [PATCH 30/45] [FIX] Update function call --- bin/xnat_fetch_sessions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/xnat_fetch_sessions.py b/bin/xnat_fetch_sessions.py index cc7a8263..cbbbf583 100755 --- a/bin/xnat_fetch_sessions.py +++ b/bin/xnat_fetch_sessions.py @@ -156,8 +156,7 @@ def download_subjects(xnat, xnat_project, destination): with datman.utils.make_temp_directory() as temp: try: - temp_zip = experiment.download( - xnat, temp, zip_name=zip_name) + temp_zip = experiment.get_files(temp, xnat, zip_name=zip_name) except Exception as e: logger.error("Cant download experiment {}. Reason: {}" "".format(experiment, e)) From cc692e7c5affa4ad4b9333eefb38f4a3ab16c176 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 22 May 2025 16:56:38 -0400 Subject: [PATCH 31/45] [FIX] Make err file regex more general for XNAT import errors --- datman/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datman/utils.py b/datman/utils.py index 45795747..d3935d3c 100644 --- a/datman/utils.py +++ b/datman/utils.py @@ -1398,7 +1398,7 @@ def parse_err_file(fname): with open(fname, "r") as fh: lines = fh.readlines() - regex = ".*<.*Importer (.*) - ([0-9]+)>*" + regex = ".*<.* (.*) - ([0-9]+)>*" match = re.match(regex, lines[0]) if not match: logger.error(f"Can't parse error file - {fname}") From c6b2660348394c4b33114957557a9df2652f0130 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 3 Jun 2025 19:13:07 -0400 Subject: [PATCH 32/45] [FIX] Started adding a fix for handling repeat sessions --- datman/exporters.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/datman/exporters.py b/datman/exporters.py index 8f80b175..64d42c91 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -327,8 +327,31 @@ def get_xnat_parser(self): xnat_sidecars = [] for scan in self.experiment.scans: xnat_sidecars.append(FakeSidecar(scan)) + + if int(self.session.session) > 1: + # Add repeat number to xnat side cars to avoid mistakenly + # tagging them as repeat 01 + for sidecar in xnat_sidecars: + sidecar.data['Repeat'] = self.session.session + + # This session is a repeat and files from previous scan(s) must + # be included or run numbers will be wrong. + for item in self.find_outputs(".json", start_dir=self.output_dir): + sidecar = dcm2bids.Sidecar(item) + if 'Repeat' not in sidecar.data: + # Assume repeat == 1 if not in json file + xnat_sidecars.append(sidecar) + elif int(sidecar.data['Repeat']) < int(self.session.session): + # Avoid duplicating this sessions' previously exported files + xnat_sidecars.append(sidecar) + + # xnat_sidecars = sorted( + # xnat_sidecars, key=lambda x: int(x.data['SeriesNumber']) + # ) xnat_sidecars = sorted( - xnat_sidecars, key=lambda x: int(x.data['SeriesNumber']) + xnat_sidecars, + key=lambda x: (int(x.data['Repeat'] if 'Repeat' in x.data else 1), + int(x.data['SeriesNumber'])) ) xnat_parser = dcm2bids.SidecarPairing( From 224addb4e90f45a90136e969592d6da42f416acd Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 6 Jun 2025 15:52:35 -0400 Subject: [PATCH 33/45] [FIX] Handle expected scans when repeat sessions exist --- datman/exporters.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/datman/exporters.py b/datman/exporters.py index 64d42c91..e82b2937 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -342,7 +342,8 @@ def get_xnat_parser(self): # Assume repeat == 1 if not in json file xnat_sidecars.append(sidecar) elif int(sidecar.data['Repeat']) < int(self.session.session): - # Avoid duplicating this sessions' previously exported files + # Include previous sessions' scans without duplicating + # the current sessions' entries. xnat_sidecars.append(sidecar) # xnat_sidecars = sorted( @@ -556,7 +557,12 @@ def get_xnat_map(self): xnat_parser = self.get_xnat_parser() xnat_map = {} for acq in xnat_parser.acquisitions: - xnat_map.setdefault(acq.srcSidecar.scan, []).append(acq.dstRoot) + try: + xnat_map.setdefault(acq.srcSidecar.scan, []).append(acq.dstRoot) + except AttributeError: + # acqs belonging to previous sessions don't have + # srcSidecar.scan and should not be in xnat_map + pass return xnat_map def get_local_map(self): From b7aafabe449c93b73783790f3ca778ce69e9643a Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 27 Jun 2025 13:47:25 -0400 Subject: [PATCH 34/45] [FIX] Address bug when bids split series exists in repeat session --- datman/exporters.py | 127 ++++++++++++++++++++++++++++++-------------- 1 file changed, 87 insertions(+), 40 deletions(-) diff --git a/datman/exporters.py b/datman/exporters.py index e82b2937..ac8c9046 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -190,6 +190,8 @@ def __init__(self, config, session, experiment, bids_opts=None, **kwargs): self.bids_ses = session._ident.timepoint self.repeat = session._ident.session self.bids_folder = session.bids_root + self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids", + f"{session.bids_sub}_{session.bids_ses}") self.output_dir = session.bids_path self.keep_dcm = bids_opts.keep_dcm if bids_opts else False self.force_dcm2niix = bids_opts.force_dcm2niix if bids_opts else False @@ -256,10 +258,36 @@ def check_contents(self, expected, actual): missing.setdefault(scan, []).append(out_name) continue - # Ignore split series, we can't handle these right now. - if len(expected[scan]) != 1: - continue - if len(actual[scan]) != 1: + # Handle split series + if len(expected[scan]) > 1: + xnat_parser = self.get_xnat_parser() + dest_acqs = [] + for acq in xnat_parser.acquisitions: + try: + found_scan = acq.srcSidecar.scan + except AttributeError: + continue + if found_scan == scan: + dest_acqs.append(acq) + + local_parser = self.get_local_parser() + src_acqs = [] + for acq in local_parser.acquisitions: + sidecar = acq.srcSidecar + if str(sidecar.data['SeriesNumber']) in [scan.series, "10" + scan.series]: + src_acqs.append(acq) + + for src_acq in src_acqs: + found = None + suffix = re.sub(r'_run-\d+', '', src_acq.suffix) + for dst_acq in dest_acqs: + if suffix == re.sub(r'_run-\d+', '', dst_acq.suffix): + found = dst_acq + if not found: + continue + expected_name = found.dstRoot + actual_name = src_acq.srcRoot.replace(self.bids_folder, "") + misnamed[actual_name] = expected_name continue expected_name = expected[scan][0] @@ -305,18 +333,29 @@ def write_error_file(self, fname, error_msg): ) def fix_run_numbers(self, misnamed_scans): + # Rename files already in the subject dir first, to + # avoid accidentally clobbering any existing misnamed files + # with os.rename for orig_name in misnamed_scans: - source_path = os.path.join(self.bids_folder, orig_name) - dest_path = os.path.join( - self.bids_folder, misnamed_scans[orig_name] - ) + if not orig_name.startswith("sub-"): + continue + self.rename_scan(orig_name, misnamed_scans[orig_name]) + + for orig_name in misnamed_scans: + if not orig_name.startswith("tmp_dcm2bids"): + continue + self.rename_scan(orig_name, misnamed_scans[orig_name]) + + def rename_scan(self, orig_name, dest_name): + source_path = os.path.join(self.bids_folder, orig_name) + dest_path = os.path.join(self.bids_folder, dest_name) - if not os.path.exists(os.path.dirname(dest_path)): - os.makedirs(os.path.dirname(dest_path)) + if not os.path.exists(os.path.dirname(dest_path)): + os.makedirs(os.path.dirname(dest_path)) - for found in glob(source_path + "*"): - _, ext = datman.utils.splitext(found) - os.rename(found, dest_path + ext) + for found in glob(source_path + "*"): + _, ext = datman.utils.splitext(found) + os.rename(found, dest_path + ext) def get_xnat_parser(self): participant = dcm2bids.Participant( @@ -328,11 +367,11 @@ def get_xnat_parser(self): for scan in self.experiment.scans: xnat_sidecars.append(FakeSidecar(scan)) - if int(self.session.session) > 1: + if int(self.repeat) > 1: # Add repeat number to xnat side cars to avoid mistakenly # tagging them as repeat 01 for sidecar in xnat_sidecars: - sidecar.data['Repeat'] = self.session.session + sidecar.data['Repeat'] = self.repeat # This session is a repeat and files from previous scan(s) must # be included or run numbers will be wrong. @@ -341,7 +380,7 @@ def get_xnat_parser(self): if 'Repeat' not in sidecar.data: # Assume repeat == 1 if not in json file xnat_sidecars.append(sidecar) - elif int(sidecar.data['Repeat']) < int(self.session.session): + elif int(sidecar.data['Repeat']) < int(self.repeat): # Include previous sessions' scans without duplicating # the current sessions' entries. xnat_sidecars.append(sidecar) @@ -381,16 +420,17 @@ def get_local_parser(self): bids_conf = dcm2bids.load_json(self.dcm2bids_config) - bids_tmp = os.path.join( - self.bids_folder, - "tmp_dcm2bids", - f"{self.session.bids_sub}_{self.session.bids_ses}" - ) - local_sidecars = [] - for search_path in [self.output_dir, bids_tmp]: + for search_path in [self.output_dir, self.bids_tmp]: for item in self.find_outputs(".json", start_dir=search_path): - local_sidecars.append(dcm2bids.Sidecar(item)) + sidecar = dcm2bids.Sidecar(item) + if ('Repeat' in sidecar.data and + sidecar.data['Repeat'] == self.repeat): + local_sidecars.append(sidecar) + elif ('Repeat' not in sidecar.data and self.repeat == '01'): + # Assume untagged sidecars all belong to the first session + local_sidecars.append(sidecar) + local_sidecars = sorted(local_sidecars) parser = dcm2bids.SidecarPairing( @@ -405,12 +445,7 @@ def get_local_parser(self): def _get_scan_dir(self, download_dir): if self.refresh: # Use existing tmp_dir instead of raw dcms - tmp_dir = os.path.join( - self.bids_folder, - "tmp_dcm2bids", - f"sub-{self.bids_sub}_ses-{self.bids_ses}" - ) - return tmp_dir + return self.bids_tmp return os.path.join(download_dir, self.dcm_dir) def outputs_exist(self): @@ -451,6 +486,11 @@ def export(self, raw_data_dir, **kwargs): logger.info(f"Dry run: Skipping bids export to {self.output_dir}") return + # Store user settings in case they change during export + orig_force = self.force_dcm2niix + orig_refresh = self.refresh + + if int(self.repeat) > 1: # Must force dcm2niix export if it's a repeat. self.force_dcm2niix = True @@ -460,7 +500,7 @@ def export(self, raw_data_dir, **kwargs): try: self.run_dcm2bids(raw_data_dir) except Exception as e: - print(f"Failed to extract data. {e}") + logger.error(f"Failed to extract data. {e}") try: self.add_repeat_num() @@ -471,6 +511,18 @@ def export(self, raw_data_dir, **kwargs): "incorrectly be tagged as belonging to the later repeat." ) + if int(self.repeat) > 1: + # Must run a second time to move the new niftis out of the tmp dir + self.force_dcm2niix = False + self.refresh = True + try: + self.run_dcm2bids(raw_data_dir) + except Exception as e: + logger.error(f"Failed to extract data. {e}") + + self.force_dcm2niix = orig_force + self.refresh = orig_refresh + def run_dcm2bids(self, raw_data_dir, tries=2): if tries == 0: logger.error(f"Dcm2bids failed to run for {self.output_dir}.") @@ -573,7 +625,7 @@ def get_local_map(self): for acq in local_parser.acquisitions: sidecar = acq.srcSidecar if ('Repeat' in sidecar.data and - sidecar.data['Repeat'] != self.session.session): + sidecar.data['Repeat'] != self.repeat): continue if 'SeriesNumber' not in sidecar.data: continue @@ -640,6 +692,7 @@ def find_outputs(self, ext, start_dir=None): def get_sidecars(self): sidecars = self.find_outputs(".json") + sidecars.extend(self.find_outputs(".json", start_dir=self.bids_tmp)) contents = {path: read_json(path) for path in sidecars} return contents @@ -702,18 +755,12 @@ def remove_criteria(descriptions): bids_conf = dcm2bids.load_json(self.dcm2bids_config) - bids_tmp = os.path.join( - self.bids_folder, - "tmp_dcm2bids", - f"{self.session.bids_sub}_{self.session.bids_ses}" - ) - local_sidecars = [] - for search_path in [self.output_dir, bids_tmp]: + for search_path in [self.output_dir, self.bids_tmp]: for item in self.find_outputs(".json", start_dir=search_path): sidecar = dcm2bids.Sidecar(item) if ('Repeat' in sidecar.data and - sidecar.data['Repeat'] != self.session.session): + sidecar.data['Repeat'] != self.repeat): continue local_sidecars.append(sidecar) local_sidecars = sorted(local_sidecars) From 755b5dbbbd5f311bbba9a710c087418771f20fbf Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 27 Jun 2025 18:55:07 -0400 Subject: [PATCH 35/45] [FIX] Add check for direction as well, if configured --- datman/exporters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datman/exporters.py b/datman/exporters.py index ac8c9046..617a4fb8 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -1005,6 +1005,7 @@ def _find_matching_files(self, bids_names, bids_conf): matches = self._filter_bids( matches, bids_conf.get(self._get_label_key(bids_conf))) matches = self._filter_bids(matches, bids_conf.get('task')) + matches = self._filter_bids(matches, bids_conf.get('dir')) # The below is used to more accurately match FMAP tags matches = self._filter_bids(matches, bids_conf.get('match_acq')) return matches From c45ab66aea99c14a1e57482a2f728b9bbbb12235 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 25 Jul 2025 15:58:17 -0400 Subject: [PATCH 36/45] [REF] Reorganize the exporters into submodules To prep for multiple versions of dcm2bids I've divided up the exporters and the bids classes will now be imported based on user environment. --- datman/exporters/__init__.py | 102 +++ datman/exporters/base.py | 122 +++ .../bids_legacy.py} | 794 +----------------- datman/exporters/dashboard.py | 399 +++++++++ datman/exporters/legacy.py | 225 +++++ 5 files changed, 875 insertions(+), 767 deletions(-) create mode 100644 datman/exporters/__init__.py create mode 100644 datman/exporters/base.py rename datman/{exporters.py => exporters/bids_legacy.py} (60%) create mode 100644 datman/exporters/dashboard.py create mode 100644 datman/exporters/legacy.py diff --git a/datman/exporters/__init__.py b/datman/exporters/__init__.py new file mode 100644 index 00000000..63c62252 --- /dev/null +++ b/datman/exporters/__init__.py @@ -0,0 +1,102 @@ +import os +import importlib +import pkgutil +import logging + +from .base import Exporter, SessionExporter, SeriesExporter + +logger = logging.getLogger(__name__) + +# Exclude bids from import until it's known which (if any) version of +# dcm2bids is in use +_exclude = {"bids", "bids_legacy"} + +__all__ = [] + + +def _load_contents(module_name): + """Load the contents of a module file in the 'exporters' folder. + """ + module = importlib.import_module(f".{module_name}", package=__name__) + + if hasattr(module, "__all__"): + contents = module.__all__ + else: + contents = [item for item in dir(module) if not item.startswith("_")] + + for item in contents: + globals()[item] = getattr(module, item) + + __all__.extend(contents) + + +# Load everything from exporters folder (except bids exporters) so contents +# can be accessed as 'datman.exporters' instead of 'datman.exporters.xxx' +for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(__file__)]): + if module_name in _exclude: + continue + _load_contents(module_name) + +# Load the appropriate version of the bids exporters (if any) +DCM2BIDS_FOUND = False + +if os.getenv("BIDS_CONTAINER"): + # Container is in use, load bids.py + _load_contents("bids") + DCM2BIDS_FOUND = True +else: + try: + from dcm2bids import dcm2bids, Dcm2bids + except ImportError: + # dcm2bids is either not installed or version >= 3 + try: + import dcm2bids + except ImportError: + # No dcm2bids available at all + DCM2BIDS_FOUND = False + else: + # dcm2bids is installed and version > 3, use bids.py + _load_contents("bids") + DCM2BIDS_FOUND = True + else: + # dcm2bids is installed and version < 3, use bids_legacy.py + _load_contents("bids_legacy") + DCM2BIDS_FOUND = True + + +def get_exporter(key: str, scope="series") -> Exporter: + """Find an exporter class for a given key identifier. + + Args: + key (:obj:`str`): The 'type' identifier of a defined exporter (e.g. + 'nii'). + scope (:obj:`str`, optional): Whether to search for a series or session + exporter. Defaults to 'series'. + + Returns: + :obj:`datman.exporters.base.Exporter`: The Exporter subclass + if one is defined, or else None. + """ + if scope == "series": + exp_set = SERIES_EXPORTERS + else: + exp_set = SESSION_EXPORTERS + + try: + exporter = exp_set[key] + except KeyError: + logger.error( + f"Unrecognized format {key} for {scope}, no exporters found.") + return None + return exporter + + +SESSION_EXPORTERS = { + exp.type: exp for exp in SessionExporter.__subclasses__() +} + +SERIES_EXPORTERS = { + exp.type: exp for exp in SeriesExporter.__subclasses__() +} + +__all__.extend(["get_exporter", "SESSION_EXPORTERS", "SERIES_EXPORTERS"]) diff --git a/datman/exporters/base.py b/datman/exporters/base.py new file mode 100644 index 00000000..6cd81237 --- /dev/null +++ b/datman/exporters/base.py @@ -0,0 +1,122 @@ +"""Base classes to use for any datman exporter. + +To allow datman to export to a new format or organizational style create a +class that inherits from either SessionExporter if it must work on an entire +scan session at once, or a SeriesExporter if it works on a single individual +scan series at a time. +""" + +from abc import ABC, abstractmethod +import os +import logging + +logger = logging.getLogger(__name__) + +__all__ = ["SeriesExporter", "SessionExporter"] + + +class Exporter(ABC): + """An abstract base class for all Exporters. + """ + + # Subclasses must define this + type = None + + @classmethod + def get_output_dir(cls, session): + """Retrieve the exporter's output dir without needing an instance. + """ + return getattr(session, f"{cls.type}_path") + + @abstractmethod + def outputs_exist(self): + """Whether outputs have already been generated for this Exporter. + + Returns: + bool: True if all expected outputs exist, False otherwise. + """ + + @abstractmethod + def needs_raw_data(self): + """Whether raw data must be downloaded for the Exporter. + + Returns: + bool: True if raw data must be given, False otherwise. Note that + False may be returned if outputs already exist. + """ + + @abstractmethod + def export(self, raw_data_dir, **kwargs): + """Exports raw data to the current Exporter's format. + + Args: + raw_data_dir (:obj:`str`): The directory that contains the + downloaded raw data. + """ + + def make_output_dir(self): + """Creates the directory where the Exporter's outputs will be stored. + + Returns: + bool: True if directory exists (or isn't needed), False otherwise. + """ + try: + os.makedirs(self.output_dir) + except FileExistsError: + pass + except AttributeError: + logger.debug(f"output_dir not defined for {self}") + except PermissionError: + logger.error(f"Failed to make output dir {self.output_dir} - " + "PermissionDenied.") + return False + return True + + +class SessionExporter(Exporter): + """A base class for exporters that take an entire session as input. + + Subclasses should override __init__ (without changing basic input args) + and call super().__init__(config, session, experiment, **kwargs). + + The init function for SessionExporter largely exists to define expected + input arguments and set some universally needed attributes. + """ + + def __init__(self, config, session, experiment, dry_run=False, **kwargs): + self.experiment = experiment + self.config = config + self.session = session + self.dry_run = dry_run + + def __repr__(self): + fq_name = str(self.__class__).replace("", "") + name = fq_name.rsplit(".", maxsplit=1)[-1] + return f"<{name} - {self.experiment.name}>" + + +class SeriesExporter(Exporter): + """A base class for exporters that take a single series as input. + """ + + # Subclasses should set this + ext = None + + def __init__(self, output_dir, fname_root, echo_dict=None, dry_run=False, + **kwargs): + self.output_dir = output_dir + self.fname_root = fname_root + self.echo_dict = echo_dict + self.dry_run = dry_run + + def outputs_exist(self): + return os.path.exists( + os.path.join(self.output_dir, self.fname_root + self.ext)) + + def needs_raw_data(self): + return not self.outputs_exist() + + def __repr__(self): + fq_name = str(self.__class__).replace("", "") + name = fq_name.rsplit(".", maxsplit=1)[-1] + return f"<{name} - {self.fname_root}>" diff --git a/datman/exporters.py b/datman/exporters/bids_legacy.py similarity index 60% rename from datman/exporters.py rename to datman/exporters/bids_legacy.py index 617a4fb8..ab9d06d1 100644 --- a/datman/exporters.py +++ b/datman/exporters/bids_legacy.py @@ -1,181 +1,34 @@ -"""Functions to export data into different file formats and organizations. +"""Export to bids format when using dcmbids versions below '3'. -To allow datman to export to a new format make a subclass of SessionExporter -or SeriesExporter depending on whether the new format requires data from -a complete scan session or a single series, respectively. The new subclass -should implement all abstract methods, including 'export' which does the -actual work of generating outputs. +For dcm2bids versions 3 and higher (or dcm2bids versions accessed via +container) 'dcm2bids', 'Dcm2bids' and 'Acquisition' are not accessible so these +exporters cannot be used. -Also, ensure that subclasses define the 'type' attribute to be a short -unique key that can be referenced in config files (e.g. 'nii'). +When using versions below '3' though, this exporter has advantages over the +newer one. Namely, its outputs_exist() method can better check the actual +contents of the folder against what we expect to have been exported (reducing +manual intervention). It can also force dcm2bids to properly export repeat +sessions into the same folder, where newer versions will simply ignore them. """ -from abc import ABC, abstractmethod from collections import OrderedDict -from datetime import datetime from glob import glob from json import JSONDecodeError import logging import os import re -import pydicom as dicom - -import datman.config -import datman.dashboard -import datman.scan -from datman.exceptions import (UndefinedSetting, DashboardException, - ConfigException) -from datman.scanid import (parse_bids_filename, ParseException, - make_filename, KCNIIdentifier) -from datman.utils import (run, make_temp_directory, get_extension, - filter_niftis, find_tech_notes, read_blacklist, - get_relative_source, read_json, write_json, - parse_err_file) - - -try: - from dcm2bids import dcm2bids, Dcm2bids - from dcm2bids.sidecar import Acquisition -except ImportError: - DCM2BIDS_FOUND = False -else: - DCM2BIDS_FOUND = True - -logger = logging.getLogger(__name__) - - -def get_exporter(key, scope="series"): - """Find an exporter class for a given key identifier. - - Args: - key (:obj:`str`): The 'type' identifier of a defined exporter (e.g. - 'nii'). - scope (:obj:`str`, optional): Whether to search for a series or session - exporter. Defaults to 'series'. - - Returns: - :obj:`datman.exporters.Exporter`: The Exporter subclass for the type, - if one is defined, or else None. - """ - if scope == "series": - exp_set = SERIES_EXPORTERS - else: - exp_set = SESSION_EXPORTERS - - try: - exporter = exp_set[key] - except KeyError: - logger.error( - f"Unrecognized format {key} for {scope}, no exporters found.") - return None - return exporter - - -class Exporter(ABC): - """An abstract base class for all Exporters. - """ - - # Subclasses must define this - type = None - - @classmethod - def get_output_dir(cls, session): - """Retrieve the exporter's output dir without needing an instance. - """ - return getattr(session, f"{cls.type}_path") - - @abstractmethod - def outputs_exist(self): - """Whether outputs have already been generated for this Exporter. - - Returns: - bool: True if all expected outputs exist, False otherwise. - """ - - @abstractmethod - def needs_raw_data(self): - """Whether raw data must be downloaded for the Exporter. - - Returns: - bool: True if raw data must be given, False otherwise. Note that - False may be returned if outputs already exist. - """ - - @abstractmethod - def export(self, raw_data_dir, **kwargs): - """Exports raw data to the current Exporter's format. - - Args: - raw_data_dir (:obj:`str`): The directory that contains the - downloaded raw data. - """ - - def make_output_dir(self): - """Creates the directory where the Exporter's outputs will be stored. - - Returns: - bool: True if directory exists (or isn't needed), False otherwise. - """ - try: - os.makedirs(self.output_dir) - except FileExistsError: - pass - except AttributeError: - logger.debug(f"output_dir not defined for {self}") - except PermissionError: - logger.error(f"Failed to make output dir {self.output_dir} - " - "PermissionDenied.") - return False - return True +from datman.scanid import make_filename +from datman.utils import (splitext, get_extension, write_json, read_json, + filter_niftis, read_blacklist, get_relative_source) +from dcm2bids import dcm2bids, Dcm2bids +from dcm2bids.sidecar import Acquisition -class SessionExporter(Exporter): - """A base class for exporters that take an entire session as input. +from .base import SessionExporter - Subclasses should override __init__ (without changing basic input args) - and call super().__init__(config, session, experiment, **kwargs). - - The init function for SessionExporter largely exists to define expected - input arguments and set some universally needed attributes. - """ - - def __init__(self, config, session, experiment, dry_run=False, **kwargs): - self.experiment = experiment - self.config = config - self.session = session - self.dry_run = dry_run - - def __repr__(self): - fq_name = str(self.__class__).replace("", "") - name = fq_name.rsplit(".", maxsplit=1)[-1] - return f"<{name} - {self.experiment.name}>" - - -class SeriesExporter(Exporter): - """A base class for exporters that take a single series as input. - """ - - # Subclasses should set this - ext = None - - def __init__(self, output_dir, fname_root, echo_dict=None, dry_run=False, - **kwargs): - self.output_dir = output_dir - self.fname_root = fname_root - self.echo_dict = echo_dict - self.dry_run = dry_run - - def outputs_exist(self): - return os.path.exists( - os.path.join(self.output_dir, self.fname_root + self.ext)) - - def needs_raw_data(self): - return not self.outputs_exist() +logger = logging.getLogger(__name__) - def __repr__(self): - fq_name = str(self.__class__).replace("", "") - name = fq_name.rsplit(".", maxsplit=1)[-1] - return f"<{name} - {self.fname_root}>" +__all__ = ["BidsExporter", "NiiLinkExporter"] class BidsExporter(SessionExporter): @@ -274,7 +127,8 @@ def check_contents(self, expected, actual): src_acqs = [] for acq in local_parser.acquisitions: sidecar = acq.srcSidecar - if str(sidecar.data['SeriesNumber']) in [scan.series, "10" + scan.series]: + if str(sidecar.data['SeriesNumber']) in [ + scan.series, "10" + scan.series]: src_acqs.append(acq) for src_acq in src_acqs: @@ -354,7 +208,7 @@ def rename_scan(self, orig_name, dest_name): os.makedirs(os.path.dirname(dest_path)) for found in glob(source_path + "*"): - _, ext = datman.utils.splitext(found) + _, ext = splitext(found) os.rename(found, dest_path + ext) def get_xnat_parser(self): @@ -477,10 +331,11 @@ def export(self, raw_data_dir, **kwargs): if self.outputs_exist(): return - if not DCM2BIDS_FOUND: - logger.info(f"Unable to export to {self.output_dir}, " - "Dcm2Bids not found.") - return + # Was this ever needed? The class should never have been made. + # if not DCM2BIDS_FOUND: + # logger.info(f"Unable to export to {self.output_dir}, " + # "Dcm2Bids not found.") + # return if self.dry_run: logger.info(f"Dry run: Skipping bids export to {self.output_dir}") @@ -490,7 +345,6 @@ def export(self, raw_data_dir, **kwargs): orig_force = self.force_dcm2niix orig_refresh = self.refresh - if int(self.repeat) > 1: # Must force dcm2niix export if it's a repeat. self.force_dcm2niix = True @@ -610,7 +464,8 @@ def get_xnat_map(self): xnat_map = {} for acq in xnat_parser.acquisitions: try: - xnat_map.setdefault(acq.srcSidecar.scan, []).append(acq.dstRoot) + xnat_map.setdefault(acq.srcSidecar.scan, []).append( + acq.dstRoot) except AttributeError: # acqs belonging to previous sessions don't have # srcSidecar.scan and should not be in xnat_map @@ -1164,601 +1019,6 @@ def make_link(self, dm_file, bids_file): logger.error(f"Failed to create {target}. Reason - {exc}") -class DBExporter(SessionExporter): - """Add a datman-style session and its contents to datman's QC dashboard. - """ - - type = "db" - - def __init__(self, config, session, experiment, **kwargs): - try: - study_resource_dir = config.get_path("resources") - except UndefinedSetting: - study_resource_dir = "" - - try: - resources_dir = os.path.join( - config.get_path("resources"), - session._ident.get_full_subjectid_with_timepoint_session() - ) - except UndefinedSetting: - resources_dir = "" - - self.nii_path = session.nii_path - self.output_dir = None - self.ident = session._ident - self.study_resource_path = study_resource_dir - self.resources_path = resources_dir - self.date = experiment.date - super().__init__(config, session, experiment, **kwargs) - - @property - def names(self): - """Gets list of valid datman-style scan names for a session. - - Returns: - :obj:`dict`: A dictionary of datman style scan names mapped to - the bids style name if one can be found, otherwise, an - empty string. - """ - names = {} - # use experiment.scans, so dashboard can report scans that didnt export - for scan in self.experiment.scans: - for name in scan.names: - names[name] = self.get_bids_name(name, self.session) - - # Check the actual folder contents as well, in case symlinked scans - # exist that werent named on XNAT - for nii in self.session.niftis: - fname = nii.file_name.replace(nii.ext, "") - if fname in names: - continue - names[fname] = self.get_bids_name(fname, self.session) - - return names - - def get_bids_name(self, dm_name, session): - """Get BIDS style scan name from a datman style nifti. - - Returns: - str: A valid bids style file name or an empty string if one - cannot be found. - """ - found = [item for item in session.find_files(dm_name) - if ".nii.gz" in item] - if not found or not os.path.islink(found[0]): - return "" - bids_src = os.readlink(found[0]) - bids_name = os.path.basename(bids_src) - return bids_name.replace(get_extension(bids_name), "") - - def export(self, *args, **kwargs): - if self.dry_run: - logger.info("Dry run: Skipping database update for " - f"{str(self.ident)}") - return - - if not datman.dashboard.dash_found: - logger.warning("Dashboard database not found, unable to add " - f"{str(self.ident)} and its contents.") - return - - session = self.make_session() - - if not session.tech_notes and session.expects_notes(): - self.add_tech_notes(session) - - for file_stem in self.names: - self.make_scan(file_stem) - - def outputs_exist(self): - try: - session = datman.dashboard.get_session(self.ident) - except DashboardException: - return False - except ParseException: - logger.error( - f"Session name {self.ident} is not datman format. Ignoring.") - return True - - if not session: - return False - - if not session.tech_notes and session.expects_notes(): - return False - - for name in self.names: - try: - scan = datman.dashboard.get_scan(name) - except DashboardException: - return False - except ParseException: - logger.error( - f"Scan name {name} is not datman format. Ignoring.") - continue - - if not scan: - return False - - if self.errors_outdated(scan, name): - return False - - return True - - @classmethod - def get_output_dir(cls, session): - return None - - def needs_raw_data(self): - return False - - def make_session(self): - """Add the current session to datman's QC database. - - Returns: - :obj:`dashboard.models.Session`: The created scan session or None. - """ - logger.debug(f"Adding session {str(self.ident)} to dashboard.") - try: - session = datman.dashboard.get_session(self.ident, create=True) - except datman.dashboard.DashboardException as exc: - logger.error(f"Failed adding session {str(self.ident)} to " - f"database. Reason: {exc}") - return None - - self._set_alt_ids(session) - self._set_date(session) - - return session - - def _set_alt_ids(self, session): - """Add alternate ID formats for the scan session to the database. - - Args: - session (:obj:`dashboard.models.Session`): A valid QC dashboard - scan session. - """ - session.timepoint.bids_name = self.ident.get_bids_name() - session.timepoint.bids_session = self.ident.timepoint - session.save() - - if not isinstance(self.ident, KCNIIdentifier): - return - - session.timepoint.kcni_name = self.ident.get_xnat_subject_id() - session.kcni_name = self.ident.get_xnat_experiment_id() - session.save() - return - - def _set_date(self, session): - """Add the scan date for a scan session to the QC database. - - Args: - session (:obj:`dashboard.models.Session`): A valid QC dashboard - scan session. - """ - if not self.date: - logger.debug(f"No scan date found for {str(self.ident)}, " - "leaving blank.") - return - - try: - date = datetime.strptime(self.date, '%Y-%m-%d') - except ValueError: - logger.error(f"Invalid scan date {self.date} for session " - f"{str(self.ident)}") - return - - if date == session.date: - return - - session.date = date - session.save() - - def add_tech_notes(self, session): - """Add the path to a scan session's tech notes to the database. - - Args: - session (:obj:`dashboard.models.Session`): A valid QC dashboard - scan session. - """ - notes = find_tech_notes(self.resources_path) - if not notes: - logger.debug(f"No tech notes found in {self.resources_path}") - return - - # Store only the path relative to the resources dir - session.tech_notes = notes.replace( - self.study_resource_path, "").lstrip("/") - session.save() - - def make_scan(self, file_stem): - """Add a single scan to datman's QC dashboard. - - Args: - file_stem (:obj:`str`): A valid datman-style file name. - """ - logger.debug(f"Adding scan {file_stem} to dashboard.") - try: - scan = datman.dashboard.get_scan(file_stem, create=True) - except datman.dashboard.DashboardException as exc: - logger.error(f"Failed adding scan {file_stem} to dashboard " - f"with error: {exc}") - return - if self.experiment.is_shared(): - source_session = self._get_source_session() - self._make_linked(scan, source_session) - self._add_bids_scan_name(scan, file_stem) - self._add_side_car(scan, file_stem) - self._update_conversion_errors(scan, file_stem) - - def _make_linked(self, scan, source_session): - try: - source_session = datman.dashboard.get_session(source_session) - except datman.dashboard.DashboardException as exc: - logger.error( - f"Failed to link shared scan {scan} to source " - f"{source_session}. Reason - {exc}" - ) - return - matches = [ - source_scan for source_scan in source_session.scans - if (source_scan.series == scan.series and - source_scan.tag == scan.tag) - ] - if not matches or len(matches) > 1: - logger.error( - f"Failed to link shared scan {scan} to {source_session}." - " Reason - Unable to find source scan database record." - ) - return - - scan.source_id = matches[0].id - scan.save() - - def _get_source_session(self): - """Get the ID of the source experiment for a shared XNATExperiment.""" - try: - config = datman.config.config(study=self.experiment.source_name) - except ConfigException: - return self.experiment.source_name - - try: - id_map = config.get_key('IdMap') - except UndefinedSetting: - return self.experiment.source_name - - return str(datman.scanid.parse(self.experiment.source_name, id_map)) - - def _add_bids_scan_name(self, scan, dm_stem): - """Add a bids format file name to a series in the QC database. - - Args: - scan (:obj:`dashboard.models.Scan`): A QC dashboard scan. - dm_stem (:obj:`str`): A valid bids format scan name, or an - empty string if the update should be skipped. - """ - bids_stem = self.names[dm_stem] - if not bids_stem: - return - - try: - bids_ident = parse_bids_filename(bids_stem) - except ParseException: - logger.debug(f"Failed to parse bids file name {bids_stem}") - return - scan.add_bids(str(bids_ident)) - - def _add_side_car(self, scan, file_stem): - """Add the JSON side car contents to the QC database. - - Args: - scan (:obj:`dashboard.models.Scan`): A QC dashboard scan. - file_stem (:obj:`str`): A valid datman-style file name. Used to - find the json side car file. - """ - nii_file = self._get_file(file_stem, ".nii.gz") - if not nii_file: - # File exists on xnat but hasnt been generated. - return - - side_car = self._get_file(file_stem, ".json") - if not side_car: - logger.error(f"Missing json side car for {file_stem}") - return - - try: - scan.add_json(side_car) - except Exception as exc: - logger.error("Failed to add JSON side car to dashboard " - f"record for {side_car}. Reason - {exc}") - - def _update_conversion_errors(self, scan, file_stem): - """Add any dcm2niix conversion errors to the QC database. - - Args: - scan (:obj:`dashboard.models.Scan`): A QC dashboard scan. - file_stem (:obj:`str`): A valid datman style file name. Used to - find the conversion error file (if one exists). - """ - convert_errors = self._get_file(file_stem, ".err") - if not convert_errors: - if scan.conv_errors: - # Erase the error message from the DB, because it - # has been resolved. - scan.add_error(None) - return - message = self._read_file(convert_errors) - scan.add_error(message) - - def _get_file(self, fname, ext): - """Find a file on the file system. - - Args: - fname (:obj:`str`): A file name (minus extension). - ext (:obj:`str`): A file extension. - - Returns: - str: The full path to the file matching the given name and - extension, otherwise None. - """ - found = os.path.join(self.nii_path, fname + ext) - if not os.path.exists(found): - bl_found = os.path.join(self.nii_path, 'blacklisted', fname + ext) - if os.path.exists(bl_found): - return bl_found - logger.debug(f"File not found {found}") - return None - return found - - def _read_file(self, fpath): - """Read the contents of a file. - - Args: - fpath (:obj:`str`): The full path to a file. - - Returns: - str: The contents of the file or None if the file cannot be read. - """ - try: - with open(fpath, "r") as file_handle: - message = file_handle.readlines() - except Exception as exc: - logger.debug(f"Can't read file {fpath} - {exc}") - return None - return message - - def errors_outdated(self, scan, fname): - err_file = self._get_file(fname, ".err") - if not err_file and scan.conv_errors: - # Error is resolved, but still appears in database - return True - if err_file and not scan.conv_errors: - # Error has appeared, but isnt recorded in database - return True - if err_file and scan.conv_errors: - # Error exists in both locations, but may have changed - message = self._read_file(err_file) - if isinstance(message, list): - message = "\n".join(message) - return message != scan.conv_errors - return False - - -class NiiExporter(SeriesExporter): - """Export a series to nifti format with datman-style names. - """ - - ext = ".nii.gz" - - type = "nii" - - def export(self, raw_data_dir, **kwargs): - if self.dry_run: - logger.info(f"Dry run: Skipping export of {self.fname_root}") - return - - if self.outputs_exist(): - logger.debug(f"Outputs exist for {self.fname_root}, skipping.") - return - - self.make_output_dir() - - with make_temp_directory(prefix="export_nifti_") as tmp: - _, log_msgs = run(f'dcm2niix -z y -b y -o {tmp} {raw_data_dir}', - self.dry_run) - for tmp_file in glob(f"{tmp}/*"): - self.move_file(tmp_file) - stem = self._get_fname(tmp_file) - self.report_issues(stem, str(log_msgs)) - - def move_file(self, gen_file): - """Move the temp outputs of dcm2niix to the intended output directory. - - Args: - gen_file (:obj:`str`): The full path to the generated nifti file - to move. - """ - fname = self._get_fname(gen_file) - - if not fname: - return - - out_file = os.path.join(self.output_dir, fname) - if os.path.exists(out_file): - logger.info(f"Output {out_file} already exists. Skipping.") - return - - return_code, _ = run(f"mv {gen_file} {out_file}", self.dry_run) - if return_code: - logger.debug(f"Moving dcm2niix output {gen_file} to {out_file} " - "has failed.") - - def _get_fname(self, gen_file): - """Get the intended datman-style name for a generated file. - - Args: - gen_file (:obj:`str`): The full path to the generated nifti file - to move. - - Result: - str: A string filename (with extension) or an empty string. - """ - ext = get_extension(gen_file) - bname = os.path.basename(gen_file) - - if self.echo_dict: - stem = self._get_echo_fname(bname, ext) - if stem != self.fname_root: - # File belongs to the wrong echo, skip it - return "" - else: - stem = self.fname_root - return stem + ext - - def _get_echo_fname(self, fname, ext): - """Get a valid datman-style file name from a multiecho file. - - Args: - fname (:obj:`str`): A filename to parse for an echo number. - ext (:obj:`str`): The file extension to use. - - Returns: - str: A valid datman-style file name or an empty string if one - cannot be made. - """ - # Match a 14 digit timestamp and 1-3 digit series num - regex = "files_(.*)_([0-9]{14})_([0-9]{1,3})(.*)?" + ext - match = re.search(regex, fname) - - if not match: - logger.error(f"Can't parse valid echo number from {fname}.") - return "" - - try: - echo = int(match.group(4).split('e')[-1][0]) - stem = self.echo_dict[echo] - except Exception: - logger.error(f"Can't parse valid echo number from {fname}") - return "" - - return stem - - def report_issues(self, stem, messages): - """Write an error log if dcm2niix had errors during conversion. - - Args: - stem (:obj:`stem`): A valid datman-style file name (minus - extension). - messages (:obj:`str`): Error messages to write. - """ - if self.dry_run: - logger.info(f"DRYRUN - Skipping write of error log for {stem}") - return - - if 'missing images' not in messages: - # The only issue we care about currently is if files are missing - return - - dest = os.path.join(self.output_dir, stem) + ".err" - self._write_error_log(dest, messages) - - def _write_error_log(self, dest, messages): - """Write an error message to the file system. - - Args: - dest (:obj:`str`): The full path of the file to write. - messages (:obj:`str`): Intended contents of the error log. - """ - try: - with open(dest, "w") as output: - output.write(messages) - except Exception as exc: - logger.error(f"Failed writing dcm2niix errors to {dest}. " - f"Reason - {type(exc).__name__} {exc} ") - - -class DcmExporter(SeriesExporter): - """Export a single dicom from a scan. - """ - - type = "dcm" - ext = ".dcm" - - def export(self, raw_data_dir, **kwargs): - self.make_output_dir() - - if self.echo_dict: - self._export_multi_echo(raw_data_dir) - return - - dcm_file = self._find_dcm(raw_data_dir) - if not dcm_file: - logger.error(f"No dicom files found in {raw_data_dir}") - return - - logger.debug(f"Exporting a dcm file from {raw_data_dir} to " - f"{self.output_dir}") - output = os.path.join(self.output_dir, self.fname_root + self.ext) - run(f"cp {dcm_file} {output}", self.dry_run) - - def _find_dcm(self, raw_data_dir): - """Find the path to a valid dicom in the given directory. - - Args: - raw_data_dir (:obj:`str`): The full path to the directory where - raw dicoms were downloaded for the series. - - Returns: - str: the full path to the first readable dicom found. - """ - for path in glob(f"{raw_data_dir}/*"): - try: - dicom.read_file(path) - except dicom.filereader.InvalidDicomError: - pass - else: - return path - return "" - - def _export_multi_echo(self, raw_data_dir): - """Find a single valid dicom for each echo in a multiecho scan. - - Args: - raw_data_dir (:obj:`str`): The full path to the directory where - raw dicoms were downloaded for the series. - """ - dcm_dict = {} - for path in glob(f"{raw_data_dir}/*"): - try: - dcm_file = dicom.read_file(path) - except dicom.filereader.InvalidDicomError: - continue - dcm_echo_num = dcm_file.EchoNumbers - if dcm_echo_num not in dcm_dict: - dcm_dict[int(dcm_echo_num)] = path - if len(dcm_dict) == len(self.echo_dict): - break - - for echo_num, dcm_echo_num in zip(self.echo_dict.keys(), - dcm_dict.keys()): - output_file = os.path.join(self.output_dir, - self.echo_dict[echo_num] + self.ext) - logger.debug(f"Exporting a dcm file from {raw_data_dir} to " - f"{output_file}") - cmd = f"cp {dcm_dict[dcm_echo_num]} {output_file}" - run(cmd, self.dry_run) - - -SESSION_EXPORTERS = { - exp.type: exp for exp in SessionExporter.__subclasses__() -} - -SERIES_EXPORTERS = { - exp.type: exp for exp in SeriesExporter.__subclasses__() -} - - class FakeSidecar(dcm2bids.Sidecar): """Turns XNAT series descriptions into pseudo-sidecars. """ diff --git a/datman/exporters/dashboard.py b/datman/exporters/dashboard.py new file mode 100644 index 00000000..86cdd290 --- /dev/null +++ b/datman/exporters/dashboard.py @@ -0,0 +1,399 @@ +"""An exporter to push raw datman files into the QC dashboard. +""" +from datetime import datetime +import logging +import os + +from .base import SessionExporter +import datman.config +import datman.dashboard +from datman.exceptions import (ConfigException, DashboardException, + UndefinedSetting) +from datman.scanid import (KCNIIdentifier, parse, parse_bids_filename, + ParseException) +from datman.utils import find_tech_notes, get_extension + +logger = logging.getLogger(__name__) + +__all__ = ["DBExporter"] + + +class DBExporter(SessionExporter): + """Add a datman-style session and its contents to datman's QC dashboard. + """ + + type = "db" + + def __init__(self, config, session, experiment, **kwargs): + try: + study_resource_dir = config.get_path("resources") + except UndefinedSetting: + study_resource_dir = "" + + try: + resources_dir = os.path.join( + config.get_path("resources"), + session._ident.get_full_subjectid_with_timepoint_session() + ) + except UndefinedSetting: + resources_dir = "" + + self.nii_path = session.nii_path + self.output_dir = None + self.ident = session._ident + self.study_resource_path = study_resource_dir + self.resources_path = resources_dir + self.date = experiment.date + super().__init__(config, session, experiment, **kwargs) + + @property + def names(self): + """Gets list of valid datman-style scan names for a session. + + Returns: + :obj:`dict`: A dictionary of datman style scan names mapped to + the bids style name if one can be found, otherwise, an + empty string. + """ + names = {} + # use experiment.scans, so dashboard can report scans that didnt export + for scan in self.experiment.scans: + for name in scan.names: + names[name] = self.get_bids_name(name, self.session) + + # Check the actual folder contents as well, in case symlinked scans + # exist that werent named on XNAT + for nii in self.session.niftis: + fname = nii.file_name.replace(nii.ext, "") + if fname in names: + continue + names[fname] = self.get_bids_name(fname, self.session) + + return names + + def get_bids_name(self, dm_name, session): + """Get BIDS style scan name from a datman style nifti. + + Returns: + str: A valid bids style file name or an empty string if one + cannot be found. + """ + found = [item for item in session.find_files(dm_name) + if ".nii.gz" in item] + if not found or not os.path.islink(found[0]): + return "" + bids_src = os.readlink(found[0]) + bids_name = os.path.basename(bids_src) + return bids_name.replace(get_extension(bids_name), "") + + def export(self, *args, **kwargs): + if self.dry_run: + logger.info("Dry run: Skipping database update for " + f"{str(self.ident)}") + return + + if not datman.dashboard.dash_found: + logger.warning("Dashboard database not found, unable to add " + f"{str(self.ident)} and its contents.") + return + + session = self.make_session() + + if not session.tech_notes and session.expects_notes(): + self.add_tech_notes(session) + + for file_stem in self.names: + self.make_scan(file_stem) + + def outputs_exist(self): + try: + session = datman.dashboard.get_session(self.ident) + except DashboardException: + return False + except ParseException: + logger.error( + f"Session name {self.ident} is not datman format. Ignoring.") + return True + + if not session: + return False + + if not session.tech_notes and session.expects_notes(): + return False + + for name in self.names: + try: + scan = datman.dashboard.get_scan(name) + except DashboardException: + return False + except ParseException: + logger.error( + f"Scan name {name} is not datman format. Ignoring.") + continue + + if not scan: + return False + + if self.errors_outdated(scan, name): + return False + + return True + + @classmethod + def get_output_dir(cls, session): + return None + + def needs_raw_data(self): + return False + + def make_session(self): + """Add the current session to datman's QC database. + + Returns: + :obj:`dashboard.models.Session`: The created scan session or None. + """ + logger.debug(f"Adding session {str(self.ident)} to dashboard.") + try: + session = datman.dashboard.get_session(self.ident, create=True) + except datman.dashboard.DashboardException as exc: + logger.error(f"Failed adding session {str(self.ident)} to " + f"database. Reason: {exc}") + return None + + self._set_alt_ids(session) + self._set_date(session) + + return session + + def _set_alt_ids(self, session): + """Add alternate ID formats for the scan session to the database. + + Args: + session (:obj:`dashboard.models.Session`): A valid QC dashboard + scan session. + """ + session.timepoint.bids_name = self.ident.get_bids_name() + session.timepoint.bids_session = self.ident.timepoint + session.save() + + if not isinstance(self.ident, KCNIIdentifier): + return + + session.timepoint.kcni_name = self.ident.get_xnat_subject_id() + session.kcni_name = self.ident.get_xnat_experiment_id() + session.save() + return + + def _set_date(self, session): + """Add the scan date for a scan session to the QC database. + + Args: + session (:obj:`dashboard.models.Session`): A valid QC dashboard + scan session. + """ + if not self.date: + logger.debug(f"No scan date found for {str(self.ident)}, " + "leaving blank.") + return + + try: + date = datetime.strptime(self.date, '%Y-%m-%d') + except ValueError: + logger.error(f"Invalid scan date {self.date} for session " + f"{str(self.ident)}") + return + + if date == session.date: + return + + session.date = date + session.save() + + def add_tech_notes(self, session): + """Add the path to a scan session's tech notes to the database. + + Args: + session (:obj:`dashboard.models.Session`): A valid QC dashboard + scan session. + """ + notes = find_tech_notes(self.resources_path) + if not notes: + logger.debug(f"No tech notes found in {self.resources_path}") + return + + # Store only the path relative to the resources dir + session.tech_notes = notes.replace( + self.study_resource_path, "").lstrip("/") + session.save() + + def make_scan(self, file_stem): + """Add a single scan to datman's QC dashboard. + + Args: + file_stem (:obj:`str`): A valid datman-style file name. + """ + logger.debug(f"Adding scan {file_stem} to dashboard.") + try: + scan = datman.dashboard.get_scan(file_stem, create=True) + except datman.dashboard.DashboardException as exc: + logger.error(f"Failed adding scan {file_stem} to dashboard " + f"with error: {exc}") + return + if self.experiment.is_shared(): + source_session = self._get_source_session() + self._make_linked(scan, source_session) + self._add_bids_scan_name(scan, file_stem) + self._add_side_car(scan, file_stem) + self._update_conversion_errors(scan, file_stem) + + def _make_linked(self, scan, source_session): + try: + source_session = datman.dashboard.get_session(source_session) + except datman.dashboard.DashboardException as exc: + logger.error( + f"Failed to link shared scan {scan} to source " + f"{source_session}. Reason - {exc}" + ) + return + matches = [ + source_scan for source_scan in source_session.scans + if (source_scan.series == scan.series and + source_scan.tag == scan.tag) + ] + if not matches or len(matches) > 1: + logger.error( + f"Failed to link shared scan {scan} to {source_session}." + " Reason - Unable to find source scan database record." + ) + return + + scan.source_id = matches[0].id + scan.save() + + def _get_source_session(self): + """Get the ID of the source experiment for a shared XNATExperiment.""" + try: + config = datman.config.config(study=self.experiment.source_name) + except ConfigException: + return self.experiment.source_name + + try: + id_map = config.get_key('IdMap') + except UndefinedSetting: + return self.experiment.source_name + + return str(parse(self.experiment.source_name, id_map)) + + def _add_bids_scan_name(self, scan, dm_stem): + """Add a bids format file name to a series in the QC database. + + Args: + scan (:obj:`dashboard.models.Scan`): A QC dashboard scan. + dm_stem (:obj:`str`): A valid bids format scan name, or an + empty string if the update should be skipped. + """ + bids_stem = self.names[dm_stem] + if not bids_stem: + return + + try: + bids_ident = parse_bids_filename(bids_stem) + except ParseException: + logger.debug(f"Failed to parse bids file name {bids_stem}") + return + scan.add_bids(str(bids_ident)) + + def _add_side_car(self, scan, file_stem): + """Add the JSON side car contents to the QC database. + + Args: + scan (:obj:`dashboard.models.Scan`): A QC dashboard scan. + file_stem (:obj:`str`): A valid datman-style file name. Used to + find the json side car file. + """ + nii_file = self._get_file(file_stem, ".nii.gz") + if not nii_file: + # File exists on xnat but hasnt been generated. + return + + side_car = self._get_file(file_stem, ".json") + if not side_car: + logger.error(f"Missing json side car for {file_stem}") + return + + try: + scan.add_json(side_car) + except Exception as exc: + logger.error("Failed to add JSON side car to dashboard " + f"record for {side_car}. Reason - {exc}") + + def _update_conversion_errors(self, scan, file_stem): + """Add any dcm2niix conversion errors to the QC database. + + Args: + scan (:obj:`dashboard.models.Scan`): A QC dashboard scan. + file_stem (:obj:`str`): A valid datman style file name. Used to + find the conversion error file (if one exists). + """ + convert_errors = self._get_file(file_stem, ".err") + if not convert_errors: + if scan.conv_errors: + # Erase the error message from the DB, because it + # has been resolved. + scan.add_error(None) + return + message = self._read_file(convert_errors) + scan.add_error(message) + + def _get_file(self, fname, ext): + """Find a file on the file system. + + Args: + fname (:obj:`str`): A file name (minus extension). + ext (:obj:`str`): A file extension. + + Returns: + str: The full path to the file matching the given name and + extension, otherwise None. + """ + found = os.path.join(self.nii_path, fname + ext) + if not os.path.exists(found): + bl_found = os.path.join(self.nii_path, 'blacklisted', fname + ext) + if os.path.exists(bl_found): + return bl_found + logger.debug(f"File not found {found}") + return None + return found + + def _read_file(self, fpath): + """Read the contents of a file. + + Args: + fpath (:obj:`str`): The full path to a file. + + Returns: + str: The contents of the file or None if the file cannot be read. + """ + try: + with open(fpath, "r") as file_handle: + message = file_handle.readlines() + except Exception as exc: + logger.debug(f"Can't read file {fpath} - {exc}") + return None + return message + + def errors_outdated(self, scan, fname): + err_file = self._get_file(fname, ".err") + if not err_file and scan.conv_errors: + # Error is resolved, but still appears in database + return True + if err_file and not scan.conv_errors: + # Error has appeared, but isnt recorded in database + return True + if err_file and scan.conv_errors: + # Error exists in both locations, but may have changed + message = self._read_file(err_file) + if isinstance(message, list): + message = "\n".join(message) + return message != scan.conv_errors + return False diff --git a/datman/exporters/legacy.py b/datman/exporters/legacy.py new file mode 100644 index 00000000..977e64f0 --- /dev/null +++ b/datman/exporters/legacy.py @@ -0,0 +1,225 @@ +"""Classes for the old-style datman exporters. + +These classes allow a single scan to be exported to various file formats with +the datman naming scheme. They were datman's only export methods prior +to 2020ish, but have been phased out in favor of using exporters that use +the bids format. +""" +from glob import glob +import logging +import os +import re + +import pydicom as dicom + +from .base import SeriesExporter +from datman.utils import run, make_temp_directory, get_extension + +logger = logging.getLogger(__name__) + +__all__ = ["NiiExporter", "DcmExporter"] + + +class NiiExporter(SeriesExporter): + """Export a series to nifti format with datman-style names. + """ + + ext = ".nii.gz" + + type = "nii" + + def export(self, raw_data_dir, **kwargs): + if self.dry_run: + logger.info(f"Dry run: Skipping export of {self.fname_root}") + return + + if self.outputs_exist(): + logger.debug(f"Outputs exist for {self.fname_root}, skipping.") + return + + self.make_output_dir() + + with make_temp_directory(prefix="export_nifti_") as tmp: + _, log_msgs = run(f'dcm2niix -z y -b y -o {tmp} {raw_data_dir}', + self.dry_run) + for tmp_file in glob(f"{tmp}/*"): + self.move_file(tmp_file) + stem = self._get_fname(tmp_file) + self.report_issues(stem, str(log_msgs)) + + def move_file(self, gen_file): + """Move the temp outputs of dcm2niix to the intended output directory. + + Args: + gen_file (:obj:`str`): The full path to the generated nifti file + to move. + """ + fname = self._get_fname(gen_file) + + if not fname: + return + + out_file = os.path.join(self.output_dir, fname) + if os.path.exists(out_file): + logger.info(f"Output {out_file} already exists. Skipping.") + return + + return_code, _ = run(f"mv {gen_file} {out_file}", self.dry_run) + if return_code: + logger.debug(f"Moving dcm2niix output {gen_file} to {out_file} " + "has failed.") + + def _get_fname(self, gen_file): + """Get the intended datman-style name for a generated file. + + Args: + gen_file (:obj:`str`): The full path to the generated nifti file + to move. + + Result: + str: A string filename (with extension) or an empty string. + """ + ext = get_extension(gen_file) + bname = os.path.basename(gen_file) + + if self.echo_dict: + stem = self._get_echo_fname(bname, ext) + if stem != self.fname_root: + # File belongs to the wrong echo, skip it + return "" + else: + stem = self.fname_root + return stem + ext + + def _get_echo_fname(self, fname, ext): + """Get a valid datman-style file name from a multiecho file. + + Args: + fname (:obj:`str`): A filename to parse for an echo number. + ext (:obj:`str`): The file extension to use. + + Returns: + str: A valid datman-style file name or an empty string if one + cannot be made. + """ + # Match a 14 digit timestamp and 1-3 digit series num + regex = "files_(.*)_([0-9]{14})_([0-9]{1,3})(.*)?" + ext + match = re.search(regex, fname) + + if not match: + logger.error(f"Can't parse valid echo number from {fname}.") + return "" + + try: + echo = int(match.group(4).split('e')[-1][0]) + stem = self.echo_dict[echo] + except Exception: + logger.error(f"Can't parse valid echo number from {fname}") + return "" + + return stem + + def report_issues(self, stem, messages): + """Write an error log if dcm2niix had errors during conversion. + + Args: + stem (:obj:`stem`): A valid datman-style file name (minus + extension). + messages (:obj:`str`): Error messages to write. + """ + if self.dry_run: + logger.info(f"DRYRUN - Skipping write of error log for {stem}") + return + + if 'missing images' not in messages: + # The only issue we care about currently is if files are missing + return + + dest = os.path.join(self.output_dir, stem) + ".err" + self._write_error_log(dest, messages) + + def _write_error_log(self, dest, messages): + """Write an error message to the file system. + + Args: + dest (:obj:`str`): The full path of the file to write. + messages (:obj:`str`): Intended contents of the error log. + """ + try: + with open(dest, "w") as output: + output.write(messages) + except Exception as exc: + logger.error(f"Failed writing dcm2niix errors to {dest}. " + f"Reason - {type(exc).__name__} {exc} ") + + +class DcmExporter(SeriesExporter): + """Export a single dicom from a scan. + """ + + type = "dcm" + ext = ".dcm" + + def export(self, raw_data_dir, **kwargs): + self.make_output_dir() + + if self.echo_dict: + self._export_multi_echo(raw_data_dir) + return + + dcm_file = self._find_dcm(raw_data_dir) + if not dcm_file: + logger.error(f"No dicom files found in {raw_data_dir}") + return + + logger.debug(f"Exporting a dcm file from {raw_data_dir} to " + f"{self.output_dir}") + output = os.path.join(self.output_dir, self.fname_root + self.ext) + run(f"cp {dcm_file} {output}", self.dry_run) + + def _find_dcm(self, raw_data_dir): + """Find the path to a valid dicom in the given directory. + + Args: + raw_data_dir (:obj:`str`): The full path to the directory where + raw dicoms were downloaded for the series. + + Returns: + str: the full path to the first readable dicom found. + """ + for path in glob(f"{raw_data_dir}/*"): + try: + dicom.read_file(path) + except dicom.filereader.InvalidDicomError: + pass + else: + return path + return "" + + def _export_multi_echo(self, raw_data_dir): + """Find a single valid dicom for each echo in a multiecho scan. + + Args: + raw_data_dir (:obj:`str`): The full path to the directory where + raw dicoms were downloaded for the series. + """ + dcm_dict = {} + for path in glob(f"{raw_data_dir}/*"): + try: + dcm_file = dicom.read_file(path) + except dicom.filereader.InvalidDicomError: + continue + dcm_echo_num = dcm_file.EchoNumbers + if dcm_echo_num not in dcm_dict: + dcm_dict[int(dcm_echo_num)] = path + if len(dcm_dict) == len(self.echo_dict): + break + + for echo_num, dcm_echo_num in zip(self.echo_dict.keys(), + dcm_dict.keys()): + output_file = os.path.join(self.output_dir, + self.echo_dict[echo_num] + self.ext) + logger.debug(f"Exporting a dcm file from {raw_data_dir} to " + f"{output_file}") + cmd = f"cp {dcm_dict[dcm_echo_num]} {output_file}" + run(cmd, self.dry_run) From b73e2c88bf9030f9143fc05fc6551653d5794d65 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 28 Jul 2025 18:03:07 -0400 Subject: [PATCH 37/45] [REF] Move BidsOptions, accept arbitrary dcm2bids options - BidsOptions class is now specific to each bids exporter module - dm_xnat_extract will more flexibly handle options for dcm2bids and other future wrapped tools. --- bin/dm_xnat_extract.py | 108 +++++++++++++-------------- datman/exporters/bids.py | 126 ++++++++++++++++++++++++++++++++ datman/exporters/bids_legacy.py | 53 +++++++++++++- 3 files changed, 228 insertions(+), 59 deletions(-) create mode 100644 datman/exporters/bids.py diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index 95e5cd45..9ed2ad9d 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -59,74 +59,26 @@ import datman.scanid import datman.xnat from datman.utils import (validate_subject_id, define_folder, - make_temp_directory, locate_metadata, read_blacklist) + make_temp_directory, read_blacklist) logger = logging.getLogger(os.path.basename(__file__)) -class BidsOptions: - """Helper class for options related to exporting to BIDS format. - """ - - def __init__(self, config, keep_dcm=False, bids_out=None, - force_dcm2niix=False, clobber=False, dcm2bids_config=None, - log_level="INFO", refresh=False): - self.keep_dcm = keep_dcm - self.force_dcm2niix = force_dcm2niix - self.clobber = clobber - self.refresh = refresh - self.bids_out = bids_out - self.log_level = log_level - self.dcm2bids_config = self.get_bids_config( - config, bids_conf=dcm2bids_config) - - def get_bids_config(self, config, bids_conf=None): - """Find the path to a valid dcm2bids config file. - - Args: - config (:obj:`datman.config.config`): The datman configuration. - bids_conf (:obj:`str`, optional): The user provided path to - the config file. Defaults to None. - - Raises: - datman.exceptions.MetadataException if a valid file cannot - be found. - - Returns: - str: The full path to a dcm2bids config file. - """ - if bids_conf: - path = bids_conf - else: - try: - path = locate_metadata("dcm2bids.json", config=config) - except FileNotFoundError as exc: - raise datman.exceptions.MetadataException( - "No dcm2bids.json config file available for " - f"{config.study_name}") from exc - - if not os.path.exists(path): - raise datman.exceptions.MetadataException( - "No dcm2bids.json settings provided.") - - return path - - def main(): - args = read_args() + args, tool_opts = read_args() log_level = get_log_level(args) configure_logging(args.study, log_level) if args.use_dcm2bids and not datman.exporters.DCM2BIDS_FOUND: - logger.error("Failed to import Dcm2Bids. Ensure that " + logger.error("Failed to locate Dcm2Bids. Ensure that " "Dcm2Bids is installed when using the " "--use-dcm2bids flag. Exiting conversion") return config = datman.config.config(study=args.study) if args.use_dcm2bids: - bids_opts = BidsOptions( + bids_opts = datman.exporters.BidsOptions( config, keep_dcm=args.keep_dcm, force_dcm2niix=args.force_dcm2niix, @@ -134,7 +86,8 @@ def main(): dcm2bids_config=args.dcm_config, bids_out=args.bids_out, log_level=log_level, - refresh=args.refresh + refresh=args.refresh, + extra_opts=tool_opts.get('--dcm2bids-') ) else: bids_opts = None @@ -236,7 +189,12 @@ def _is_file(path, parser): ) g_dcm2bids = parser.add_argument_group( - "Options for using dcm2bids" + "Options for using dcm2bids. Note that you can feed options directly " + "to dcm2bids by prefixing any with '--dcm2bids-'. For example, the " + "dcm2bids option 'auto-extract-entities' can be used with " + "'--dcm2bids-auto-extract-entities'. Note that the spelling and case " + "must match exactly what dcm2bids expects to receive and must exist " + "for the version of dcm2bids in use" ) g_dcm2bids.add_argument( "--bids-out", action="store", metavar="DIR", @@ -289,14 +247,50 @@ def _is_file(path, parser): help="Do nothing" ) - args = parser.parse_args() + tool_opts, clean_args = parse_tool_opts(sys.argv[1:], ['--dcm2bids-']) + args = parser.parse_args(clean_args) bids_opts = [args.keep_dcm, args.dcm_config, args.bids_out, args.force_dcm2niix, args.clobber, args.refresh] - if not args.use_dcm2bids and any(bids_opts): + if not args.use_dcm2bids and (any(bids_opts) or + '--dcm2bids-' in tool_opts): parser.error("dcm2bids configuration requires --use-dcm2bids") - return args + return args, tool_opts + + +def parse_tool_opts( + args: list[str], + accepted_prefixes: list[str] + ) -> tuple[dict[str, list[str]], list[str]]: + """Collect user options intended for wrapped tools. + + Args: + args (list[str]): A list of string inputs to process. + accepted_prefixes (list[str]): a list of prefixes for options that + will be accepted. + + Returns: + tuple[dict[str, list[str]], list[str]]: + A tuple containing: + - A dictionary mapping an accepted prefix and arguments + associated with it. + - A list of all arguments the user provided that do not match + an accepted prefix. + """ + extra_opts = {} + clean_args = [] + for arg in args: + found = False + for prefix in accepted_prefixes: + if arg.startswith(prefix): + found = True + opt = arg[len(prefix):] + # _, opt = arg.split(prefix) + extra_opts.setdefault(prefix, []).append(opt) + if not found: + clean_args.append(arg) + return extra_opts, clean_args def get_log_level(args): diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py new file mode 100644 index 00000000..5e6dc900 --- /dev/null +++ b/datman/exporters/bids.py @@ -0,0 +1,126 @@ +"""Export to bids format when using containerized dcm2bids (or versions >=3) +""" +import os +import logging +from dataclasses import dataclass +from pathlib import Path + +import datman.config +from .base import SessionExporter +from datman.exceptions import MetadataException +from datman.utils import locate_metadata + +logger = logging.getLogger(__name__) + +__all__ = ["BidsExporter", "BidsOptions"] + + +@dataclass +class BidsOptions: + """Helper class for options related to exporting to BIDS format. + """ + dm_config: datman.config.config + keep_dcm: bool = False + bids_out: str | None = None + force_dcm2niix: bool = False + clobber: bool = False + dcm2bids_config: str | None = None + log_level: str = "INFO" + refresh: bool = False + extra_opts: list = None + + def __post_init__(self): + self.dcm2bids_config = self.get_bids_config( + self.dm_config, + bids_conf=self.dcm2bids_config + ) + + def get_bids_config(self, config: datman.config.config, + bids_conf: str | None = None) -> str: + """Find the path to a valid dcm2bids config file. + + Args: + config (:obj:`datman.config.config`): The datman configuration. + bids_conf (:obj:`str`, optional): The user provided path to + the config file. Defaults to None. + + Raises: + datman.exceptions.MetadataException if a valid file cannot + be found. + + Returns: + str: The full path to a dcm2bids config file. + """ + if bids_conf: + path = bids_conf + else: + try: + path = locate_metadata("dcm2bids.json", config=config) + except FileNotFoundError as exc: + raise MetadataException( + "No dcm2bids.json config file available for " + f"{config.study_name}") from exc + + if not os.path.exists(path): + raise MetadataException("No dcm2bids.json settings provided.") + + return path + + +class BidsExporter(SessionExporter): + + type = "bids" + + def __init__(self, config, session, experiment, bids_opts=None, **kwargs): + self.dcm_dir = experiment.dcm_subdir + self.bids_sub = session._ident.get_bids_name() + self.bids_ses = session._ident.timepoint + self.repeat = session._ident.session + self.bids_folder = session.bids_root + self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids", + f"{session.bids_sub}_{session.bids_ses}") + self.output_dir = session.bids_path + self.keep_dcm = bids_opts.keep_dcm if bids_opts else False + self.force_dcm2niix = bids_opts.force_dcm2niix if bids_opts else False + self.clobber = bids_opts.clobber if bids_opts else False + self.log_level = bids_opts.log_level if bids_opts else "INFO" + self.dcm2bids_config = bids_opts.dcm2bids_config if bids_opts else None + self.refresh = bids_opts.refresh if bids_opts else False + + # Can be removed if dcm2bids patches the log issue + self.set_log_level() + + super().__init__(config, session, experiment, **kwargs) + return + + +class NiiLinkExporter(SessionExporter): + + type = "nii_link" + ext = ".nii.gz" + + def __init__(self, config, session, experiment, **kwargs): + return + + def get_dm_names(self): + """Get the datman-style scan names for an entire XNAT experiment. + + Returns: + :obj:`dict`: A dict of series numbers matched to a list of + datman-style names for all scans found for the session on XNAT. + """ + # Difference number 1: This will return every series, even + # the ones that don't get assigned a name in the traditional + names = {} + for scan in self.experiment.scans: + try: + series = int(scan.series) + except ValueError: + # XNAT sometimes adds a string when it finds duplicate series + # numbers. This is an error that should be resolved on the + # server so these instances are safe to ignore. + continue + names.setdefault(series, []).extend(scan.names) + return names + + # def get_bids_names(self): diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py index ab9d06d1..1b6a5e86 100644 --- a/datman/exporters/bids_legacy.py +++ b/datman/exporters/bids_legacy.py @@ -17,9 +17,11 @@ import os import re +from datman.exceptions import MetadataException from datman.scanid import make_filename from datman.utils import (splitext, get_extension, write_json, read_json, - filter_niftis, read_blacklist, get_relative_source) + filter_niftis, read_blacklist, get_relative_source, + locate_metadata) from dcm2bids import dcm2bids, Dcm2bids from dcm2bids.sidecar import Acquisition @@ -28,7 +30,54 @@ logger = logging.getLogger(__name__) -__all__ = ["BidsExporter", "NiiLinkExporter"] +__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"] + + +class BidsOptions: + """Helper class for options related to exporting to BIDS format. + """ + + def __init__(self, config, keep_dcm=False, bids_out=None, + force_dcm2niix=False, clobber=False, dcm2bids_config=None, + log_level="INFO", refresh=False, **kwargs): + self.keep_dcm = keep_dcm + self.force_dcm2niix = force_dcm2niix + self.clobber = clobber + self.refresh = refresh + self.bids_out = bids_out + self.log_level = log_level + self.dcm2bids_config = self.get_bids_config( + config, bids_conf=dcm2bids_config) + + def get_bids_config(self, config, bids_conf=None): + """Find the path to a valid dcm2bids config file. + + Args: + config (:obj:`datman.config.config`): The datman configuration. + bids_conf (:obj:`str`, optional): The user provided path to + the config file. Defaults to None. + + Raises: + datman.exceptions.MetadataException if a valid file cannot + be found. + + Returns: + str: The full path to a dcm2bids config file. + """ + if bids_conf: + path = bids_conf + else: + try: + path = locate_metadata("dcm2bids.json", config=config) + except FileNotFoundError as exc: + raise MetadataException( + "No dcm2bids.json config file available for " + f"{config.study_name}") from exc + + if not os.path.exists(path): + raise MetadataException("No dcm2bids.json settings provided.") + + return path class BidsExporter(SessionExporter): From 4cd539965afcd54528c7bfd0d52890fefae8ffed Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 31 Jul 2025 21:25:34 -0400 Subject: [PATCH 38/45] [WIP] Add some functionality back to new bids class --- datman/exporters/bids.py | 209 +++++++++++++++++++++++++++++++++++---- 1 file changed, 191 insertions(+), 18 deletions(-) diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index 5e6dc900..6a1bbf17 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -2,6 +2,7 @@ """ import os import logging +import json from dataclasses import dataclass from pathlib import Path @@ -71,8 +72,8 @@ class BidsExporter(SessionExporter): type = "bids" - def __init__(self, config, session, experiment, bids_opts=None, **kwargs): - self.dcm_dir = experiment.dcm_subdir + def __init__(self, config, session, importer, bids_opts=None, **kwargs): + self.dcm_dir = importer.dcm_subdir self.bids_sub = session._ident.get_bids_name() self.bids_ses = session._ident.timepoint self.repeat = session._ident.session @@ -87,40 +88,212 @@ def __init__(self, config, session, experiment, bids_opts=None, **kwargs): self.dcm2bids_config = bids_opts.dcm2bids_config if bids_opts else None self.refresh = bids_opts.refresh if bids_opts else False - # Can be removed if dcm2bids patches the log issue - self.set_log_level() - - super().__init__(config, session, experiment, **kwargs) + super().__init__(config, session, importer, **kwargs) return + def outputs_exist(self): + if self.refresh: + logger.info( + f"Re-comparing existing tmp folder for {self.output_dir}" + "to dcm2bids config to pull missed series." + ) + return False + + if self.clobber: + logger.info( + f"{self.output_dir} will be overwritten due to clobber option." + ) + return False + + out_dir = Path(self.output_dir) + if not out_dir.exists(): + return False + + json_files = out_dir.rglob("*.json") + + + expected_scans = self.get_expected_scans() + actual_scans = self.get_actual_scans() + _, missing = self.check_contents(expected_scans, actual_scans) + if missing: + return False + + return True + + def get_contents(self): + outputs = {} + + + class NiiLinkExporter(SessionExporter): + """Populates a study's nii folder with symlinks pointing to the bids dir. + """ type = "nii_link" ext = ".nii.gz" - def __init__(self, config, session, experiment, **kwargs): - return + def __init__(self, config, session, importer, **kwargs): + self.ident = session._ident + self.output_dir = session.nii_path + self.bids_path = session.bids_path + self.config = config + self.tags = config.get_tags(site=session.site) + + super().__init__(config, session, importer, **kwargs) + + self.dm_names = self.get_dm_names() + + @classmethod + def get_output_dir(cls, session): + return session.nii_path + + def needs_raw_data(self): + return False def get_dm_names(self): """Get the datman-style scan names for an entire XNAT experiment. + This is used to + 1) Ensure the contents of the nii folder matches what may have + been produced with an old-style NiiExporter + 2) To predict if an expected scan didn't extract correctly into + the bids folder. + Returns: - :obj:`dict`: A dict of series numbers matched to a list of - datman-style names for all scans found for the session on XNAT. + dict: A map of each series number to the name (or + names) the series would be exported under. """ - # Difference number 1: This will return every series, even - # the ones that don't get assigned a name in the traditional names = {} for scan in self.experiment.scans: try: - series = int(scan.series) + series_num = int(scan.series) except ValueError: - # XNAT sometimes adds a string when it finds duplicate series - # numbers. This is an error that should be resolved on the - # server so these instances are safe to ignore. + # Ignore xnat scans with non-numeric series numbers. + # These are often of the form MR-XX and result from duplicated + # uploads / errors when merging on xnat. continue - names.setdefault(series, []).extend(scan.names) + names[series_num] = scan.names return names - # def get_bids_names(self): + def get_bids_sidecars(self): + """Get all sidecars from a BIDS session. + + Returns: + :obj:`dict`: A map from the series number to the sidecar(s) that + belong to that series. + """ + sidecars = {} + bids_folder = Path(self.bids_path) + for sidecar in bids_folder.rglob("*.json"): + try: + contents = sidecar.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError) as e: + logger.debug( + f"Ignoring unreadable json sidecar {sidecar} - {e}" + ) + continue + + try: + data = json.loads(contents) + except (json.JSONDecodeError, TypeError) as e: + logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}") + continue + + data["path"] = sidecar + + if "SeriesNumber" not in data: + continue + + # Need code later to handle split series (do they always + # prefix series number with "10"?) + # -> For new CALM sessions it doesnt, it just allows them to + # retain the original series number (and duplicates it) + # not sure if this is because of CALM or a change in dcm2niix + # or a change in dcm2bids + try: + series_num = int(data["SeriesNumber"]) + except ValueError: + continue + + sidecars.setdefault(series_num, []).append(data) + + fix_split_series(sidecars) + + return sidecars + + +def get_bids_sidecars(bids_path, repeat): + """Get all sidecars from a BIDS session. + + Returns: + :obj:`dict`: A map from the series number to the sidecar(s) that + belong to that series. + """ + bids_folder = Path(bids_path) + sidecars = {} + + for sidecar in bids_folder.rglob("*.json"): + try: + contents = sidecar.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError) as e: + logger.debug( + f"Ignoring unreadable json sidecar {sidecar} - {e}" + ) + continue + + try: + data = json.loads(contents) + except (json.JSONDecodeError, TypeError) as e: + logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}") + continue + + data["path"] = sidecar + + if "SeriesNumber" not in data: + continue + + if "Repeat" not in data: + if repeat == "01": + # Assume sidecar belongs to this session, as there's + # usually only 1 'repeat' anyway + data["Repeat"] = "01" + else: + continue + + if data["Repeat"] != repeat: + continue + + try: + series_num = int(data["SeriesNumber"]) + except ValueError: + continue + + sidecars.setdefault(series_num, []).append(data) + + fix_split_series(sidecars) + + return sidecars + + +def fix_split_series(sidecars): + # Handle legacy dcm2bids/dcm2niix split sessions which recieved a + # "10" prefix to their series numbers (e.g. '05' would become '1005' + # for one half of a split fmap) + all_str_series = [str(series).zfill(2) for series in sidecars] + delete = [] + for series in sidecars: + str_series = str(series) + if not str_series.startswith("10"): + continue + if len(str_series) < 4: + continue + trimmed_series = str_series[2:] + if trimmed_series not in all_str_series: + # False alarm, just a weird custom series + continue + sidecars[int(trimmed_series)].extend(sidecars[series]) + delete.append(series) + for series in delete: + del sidecars[series] + return sidecars \ No newline at end of file From 601dabf7e68fb99b344a74fab0b16bb27412076d Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 8 Aug 2025 13:42:52 -0400 Subject: [PATCH 39/45] [ENH] Update the NiiLinkExporter to better locate bids files --- datman/exporters/bids.py | 504 ++++++++++++++++++++++++++++++++------- 1 file changed, 414 insertions(+), 90 deletions(-) diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index 6a1bbf17..e73043a4 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -3,17 +3,19 @@ import os import logging import json +import re from dataclasses import dataclass from pathlib import Path import datman.config from .base import SessionExporter from datman.exceptions import MetadataException -from datman.utils import locate_metadata +from datman.utils import locate_metadata, read_blacklist, get_relative_source +from datman.scanid import make_filename logger = logging.getLogger(__name__) -__all__ = ["BidsExporter", "BidsOptions"] +__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"] @dataclass @@ -124,8 +126,6 @@ def get_contents(self): outputs = {} - - class NiiLinkExporter(SessionExporter): """Populates a study's nii folder with symlinks pointing to the bids dir. """ @@ -137,13 +137,12 @@ def __init__(self, config, session, importer, **kwargs): self.ident = session._ident self.output_dir = session.nii_path self.bids_path = session.bids_path + self.repeat = session.session self.config = config self.tags = config.get_tags(site=session.site) super().__init__(config, session, importer, **kwargs) - self.dm_names = self.get_dm_names() - @classmethod def get_output_dir(cls, session): return session.nii_path @@ -151,6 +150,62 @@ def get_output_dir(cls, session): def needs_raw_data(self): return False + def outputs_exist(self): + sidecars = self.get_bids_sidecars() + name_map = self.make_dm_names(sidecars) + + for dm_name in name_map: + + if read_blacklist(scan=dm_name, config=self.config): + continue + + full_path = os.path.join(self.output_dir, dm_name + self.ext) + if not os.path.exists(full_path): + return False + + return True + + def export(self, *args, **kwargs): + sidecars = self.get_bids_sidecars() + name_map = self.make_dm_names(sidecars) + + if self.dry_run: + logger.info("Dry run: Skipping making nii folder links for " + f"mapping {self.name_map}") + return + + if self.outputs_exist(): + return + + self.make_output_dir() + + for dm_name, bids_name in self.name_map.items(): + self.link_scan(dm_name, bids_name) + + def link_scan(self, dm_name: str, bids_root: Path | str): + """Create a symlink in the datman style that points to a bids file. + + Args: + dm_name (:obj:`str`): A valid datman file name. + bids_root (:obj:`pathlib.Path`): The full path to a bids file + (without an extension). + """ + + if read_blacklist(scan=dm_name, config=self.config): + logger.debug(f"Ignoring blacklisted scan {dm_name}") + return + + base_target = os.path.join(self.output_dir, dm_name) + for source in glob(bids_file + "*"): + ext = get_extension(source) + target = base_target + ext + + if is_broken_link(target): + remove_broken_link(target) + + rel_source = get_relative_source(source, target) + make_link(rel_source, target) + def get_dm_names(self): """Get the datman-style scan names for an entire XNAT experiment. @@ -176,124 +231,393 @@ def get_dm_names(self): names[series_num] = scan.names return names - def get_bids_sidecars(self): - """Get all sidecars from a BIDS session. + def get_bids_sidecars(self) -> dict[int, list]: + """Get all sidecars from the session's BIDS folder. Returns: - :obj:`dict`: A map from the series number to the sidecar(s) that - belong to that series. + :obj:`dict`: A map from the series number to a list of the JSON + sidecar contents that result from that series. """ sidecars = {} bids_folder = Path(self.bids_path) for sidecar in bids_folder.rglob("*.json"): - try: - contents = sidecar.read_text(encoding="utf-8") - except (UnicodeDecodeError, OSError) as e: - logger.debug( - f"Ignoring unreadable json sidecar {sidecar} - {e}" - ) - continue - try: - data = json.loads(contents) - except (json.JSONDecodeError, TypeError) as e: - logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}") + contents = self.read_sidecar(sidecar) + if not contents: continue - data["path"] = sidecar + if not self.matches_repeat(contents): + continue - if "SeriesNumber" not in data: + if "SeriesNumber" not in contents: + logger.debug( + "Ignoring malformed sidecar file (missing SeriesNumber): " + f"{sidecar}" + ) continue - # Need code later to handle split series (do they always - # prefix series number with "10"?) - # -> For new CALM sessions it doesnt, it just allows them to - # retain the original series number (and duplicates it) - # not sure if this is because of CALM or a change in dcm2niix - # or a change in dcm2bids try: - series_num = int(data["SeriesNumber"]) + series_num = int(contents["SeriesNumber"]) except ValueError: + logger.debug( + f"Ignoring non-numeric series number in {sidecar}" + ) continue - sidecars.setdefault(series_num, []).append(data) + sidecars.setdefault(series_num, []).append(contents) - fix_split_series(sidecars) + self.fix_split_series_nums(sidecars) return sidecars + def read_sidecar(self, sidecar: str | Path) -> dict: + """Read the contents of a JSON sidecar file. -def get_bids_sidecars(bids_path, repeat): - """Get all sidecars from a BIDS session. - - Returns: - :obj:`dict`: A map from the series number to the sidecar(s) that - belong to that series. - """ - bids_folder = Path(bids_path) - sidecars = {} + NOTE: This adds the path of the file itself under the key 'Path' + """ + if not isinstance(sidecar, Path): + sidecar = Path(sidecar) - for sidecar in bids_folder.rglob("*.json"): try: contents = sidecar.read_text(encoding="utf-8") except (UnicodeDecodeError, OSError) as e: logger.debug( - f"Ignoring unreadable json sidecar {sidecar} - {e}" + f"Sidecar file is unreadable {sidecar} - {e}" ) - continue + return {} try: data = json.loads(contents) except (json.JSONDecodeError, TypeError) as e: - logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}") - continue + logger.debug(f"Invalid json sidecar {sidecar} - {e}") + return {} - data["path"] = sidecar + data["Path"] = sidecar - if "SeriesNumber" not in data: - continue + return data - if "Repeat" not in data: - if repeat == "01": - # Assume sidecar belongs to this session, as there's - # usually only 1 'repeat' anyway - data["Repeat"] = "01" - else: + def matches_repeat(self, sidecar: dict) -> bool: + """Check if a sidecar matches the current session's 'repeat'. + + The 'repeat' number is used to track when a scan session was stopped + and restarted during a visit. Most of the time it will be '01'. + """ + if "Repeat" not in sidecar: + # If this session is the first 'repeat' it's safe to assume an + # untagged sidecar belongs to it, since usually there's only one + # 'repeat' anyway. + return self.repeat == "01" + return sidecar["Repeat"] == self.repeat + + def fix_split_series_nums(self, sidecars: dict[int, list] + ) -> dict[int, list]: + """Attempt to correct series nums that have been prefixed with '10'. + + Some older versions of dcm2niix/dcm2bids liked to prefix half of a + split series' number with '10' rather than allowing all sidecars + to share the original series num. This attempts to identify when + that has happened and find the original series number for these + files. + """ + all_series = [str(series).zfill(2) for series in sidecars] + must_delete = [] + + for series in sidecars: + str_series = str(series) + + if not str_series.startswith("10"): + continue + + if len(str_series) < 4: continue - if data["Repeat"] != repeat: - continue + trimmed_series = str_series[2:] + if trimmed_series not in all_series: + # False alarm, probably not a mutated series number + continue - try: - series_num = int(data["SeriesNumber"]) - except ValueError: - continue - - sidecars.setdefault(series_num, []).append(data) - - fix_split_series(sidecars) - - return sidecars - - -def fix_split_series(sidecars): - # Handle legacy dcm2bids/dcm2niix split sessions which recieved a - # "10" prefix to their series numbers (e.g. '05' would become '1005' - # for one half of a split fmap) - all_str_series = [str(series).zfill(2) for series in sidecars] - delete = [] - for series in sidecars: - str_series = str(series) - if not str_series.startswith("10"): - continue - if len(str_series) < 4: - continue - trimmed_series = str_series[2:] - if trimmed_series not in all_str_series: - # False alarm, just a weird custom series - continue - sidecars[int(trimmed_series)].extend(sidecars[series]) - delete.append(series) - for series in delete: - del sidecars[series] - return sidecars \ No newline at end of file + sidecars[int(trimmed_series)].extend(sidecars[series]) + must_delete.append(series) + + for series in must_delete: + del sidecars[series] + + return sidecars + + def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]: + """Create a datman-style name for each identifiable sidecar. + + Args: + sidecars (`dict`): A dictionary mapping series numbers to a list + of bids sidecar files generated by that series. + + Returns: + dict: a dictionary mapping a datman-style filename to the bids + sidecar path (minus extension) it belongs to. + """ + found_names = {} + reqs = self.get_tag_requirements() + for series in sidecars: + + temp_names = {} + for item in sidecars[series]: + + found = self.find_tag(item, reqs) + + if not found: + logger.debug(f"No tag matches {item['Path']}, ignoring.") + continue + + if len(found) > 1: + logger.error( + f"Multiple tags ({found}) match sidecar " + f"{item['Path']}. Ignoring it. Please update " + "configuration so at most one tag matches." + ) + continue + + dm_name = make_filename( + self.ident, + found[0], + series, + item["SeriesDescription"] + ) + + temp_names.setdefault(dm_name, []).append(item) + + found_names = self.handle_duplicate_names(found_names, temp_names) + + return found_names + + def get_tag_requirements(self) -> dict[str, dict]: + """Read and reformat user configuration for all tags. + + As described in datman's configuration documentation, at a minimum each + tag must define a 'SeriesDescription' regular expression. Tags + may optionally include a 'Bids' section, alongside datman's + 'Pattern' and 'Count' fields for a tag to make it more restrictive or + accurate. + + If included, the 'Bids' section should contain a list of sidecar field + names to check when determining if a tag can by applied. These must + match the sidecars fields verbatim (case-sensitive). Each field name + may then point to either: + + - a literal string to be matched + - a dictionary of settings + + The dictionary of settings may include the following keys: + + - **Pattern** (`str` or list, optional): May be a literal string or a + regular expression in Python format (e.g., use `.*` not `*`), or a + list of literal strings. Optional if `Exclude` is given. If omitted + and `Exclude` is used, the presence of the field name alone + excludes a sidecar from taking the tag. + - **Regex** (`bool`, optional): Indicates whether `Pattern` is a regex + or a string literal. Default is `False`. + - **Exclude** (`bool`, optional): Indicates whether to exclude sidecars + that match the pattern (i.e., take the inverse). Default is `False`. + + Examples: + Below are some YAML examples of commonly used configuration. + + Prevent any sidecar with an 'IntendedFor' field from matching + a tag: + + Bids: + IntendedFor: + Exclude: True + + Match a sidecar only if the PhaseEncodingDirection is exactly 'j': + + Bids: + PhaseEncodingDirection: 'j' + + Match a sidecar only if the ImageType contains 'DERIVED': + + Bids: + ImageType: + Pattern: 'DERIVED' + Regex: True + + Returns: + A dictionary mapping each tag name to the requirements that + must be met for a tag to be applied to a BIDs sidecar. + """ + reqs = {} + for tag in self.tags: + + conf = self.tags.get(tag) + + if is_malformed(conf): + logger.error( + f"Ignoring tag {tag} - Incorrectly configured. Each tag " + "must contain a 'Pattern' section and each 'Pattern', at " + "a minimum, must contain a 'SeriesDescription'. Consult " + "the docs for more info.") + continue + + regex = conf["Pattern"]["SeriesDescription"] + if isinstance(regex, list): + regex = "|".join(regex) + + tag_reqs = { + "SeriesDescription": { + "Pattern": regex, + "Regex": True, + "Exclude": False + } + } + + bids_conf = conf.get("Bids", {}) + for field in bids_conf: + # Ensure consistent formatting for settings + if isinstance(bids_conf[field], str): + pattern = bids_conf[field] + regex = False + exclude = False + else: + pattern = bids_conf[field].get("Pattern", "") + if isinstance(pattern, list): + pattern = str(pattern) + regex = bids_conf[field].get("Regex", False) + exclude = bids_conf[field].get("Exclude", False) + + tag_reqs[field] = { + "Pattern": pattern, + "Regex": regex, + "Exclude": exclude + } + + reqs[tag] = tag_reqs + return reqs + + def find_tag(self, + sidecar: dict, + requirements: dict | None = None) -> list: + """Find which configured tags, if any, can be applied to a sidecar. + + Args: + sidecar (`dict`): The contents of a json sidecar. + requirements (`dict`, optional): The requirements to match + each accepted tag. Default is 'None', in which case the + default datman configuration will be consulted. + + Returns: + A list of tag names that the sidecar matches. + """ + if not requirements: + requirements = self.get_tag_requirements() + + found = [] + for tag in requirements: + + match = True + for field in requirements[tag]: + pattern = requirements[tag][field].get("Pattern", "") + is_regex = requirements[tag][field].get("Regex", False) + exclude = requirements[tag][field].get("Exclude", False) + + if field not in sidecar: + if not exclude: + # Absence of an expected field fails tag match + match = False + continue + + if exclude and not pattern: + # Excluded field is in sidecar, so doesnt match tag + match = False + continue + + actual = sidecar[field] + if isinstance(actual, list): + actual = str(actual) + + if is_regex: + comparator = re.search + else: + comparator = re.fullmatch + + if not comparator(pattern, actual, re.IGNORECASE): + match = False + elif exclude: + # Tag does match, but settings indicate to take inverse + match = False + if match: + found.append(tag) + + return found + + def handle_duplicate_names(self, + existing_names: dict[str, str], + new_entries: dict[str, dict] + ) -> dict[str, str]: + """Make duplicated names unique. + + Sometimes, as with multi-echo scans, multiple BIDs files will create + the same datman name. This ensures a unique name exists for each. + + Args: + existing_names (`dict`): The dictionary to add the fixed name + entries to. + new_entries (`dict`): New entries that may contain duplicated + datman-style names. + + Returns: + dict[str, str]: The existing_names dictionary with all + new entries merged in with unique names. + """ + for name in new_entries: + + if len(new_entries[name]) == 1: + existing_names[name] = remove_extension( + new_entries[name][0]["Path"] + ) + continue + + for sidecar in new_entries[name]: + if "EchoNumber" not in sidecar: + logger.error( + "Multiple BIDs files result in same file name " + f"'{name}'. Please update configuration to help " + f"identify file: {sidecar['Path']}" + ) + continue + new_name = name + f"_ECHO-{sidecar['EchoNumber']}" + existing_names[new_name] = remove_extension(sidecar['Path']) + + return existing_names + +def is_malformed(config: dict) -> bool: + """Check if a tag's configuration is unusably malformed. + """ + if "Pattern" not in config: + return True + if "SeriesDescription" not in config["Pattern"]: + return True + return False + +def remove_extension(path: Path) -> Path: + """Remove all extensions from a path. + """ + while path.suffix: + path = path.with_suffix("") + return path + +def is_broken_link(symlink: str) -> bool: + return os.path.islink(symlink) and not os.path.exists(symlink) + +def remove_broken_link(target: str): + try: + os.unlink(target) + except OSError as e: + logger.error(f"Failed to remove broken symlink {target} - {e}") + return + +def make_link(source: str, target: str): + try: + os.symlink(source, target) + except FileExistsError: + pass + except OSError as e: + logger.error(f"Failed to create {target} - {e}") \ No newline at end of file From b7fe7af4f67f2d919b3ff1929aaef59380ce2c64 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 12 Aug 2025 16:04:42 -0400 Subject: [PATCH 40/45] [FIX] Make NiiLinkExporter less spammy, handle ints in config --- datman/exporters/bids.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index e73043a4..fec7d39b 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -369,7 +369,7 @@ def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]: continue if len(found) > 1: - logger.error( + logger.debug( f"Multiple tags ({found}) match sidecar " f"{item['Path']}. Ignoring it. Please update " "configuration so at most one tag matches." @@ -472,13 +472,13 @@ def get_tag_requirements(self) -> dict[str, dict]: bids_conf = conf.get("Bids", {}) for field in bids_conf: # Ensure consistent formatting for settings - if isinstance(bids_conf[field], str): - pattern = bids_conf[field] + if isinstance(bids_conf[field], (str, int)): + pattern = str(bids_conf[field]) regex = False exclude = False else: pattern = bids_conf[field].get("Pattern", "") - if isinstance(pattern, list): + if not isinstance(pattern, str): pattern = str(pattern) regex = bids_conf[field].get("Regex", False) exclude = bids_conf[field].get("Exclude", False) @@ -530,7 +530,7 @@ def find_tag(self, continue actual = sidecar[field] - if isinstance(actual, list): + if not isinstance(actual, str): actual = str(actual) if is_regex: From 72a3e0c4412bd36425bab52d85ab96fe4d2c6956 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 12 Aug 2025 16:55:22 -0400 Subject: [PATCH 41/45] [FIX] Correct typos and missing imports --- datman/exporters/bids.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index fec7d39b..fb6d7bcc 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -4,13 +4,15 @@ import logging import json import re +from glob import glob from dataclasses import dataclass from pathlib import Path import datman.config from .base import SessionExporter from datman.exceptions import MetadataException -from datman.utils import locate_metadata, read_blacklist, get_relative_source +from datman.utils import (locate_metadata, read_blacklist, get_relative_source, + get_extension) from datman.scanid import make_filename logger = logging.getLogger(__name__) @@ -171,7 +173,7 @@ def export(self, *args, **kwargs): if self.dry_run: logger.info("Dry run: Skipping making nii folder links for " - f"mapping {self.name_map}") + f"mapping {name_map}") return if self.outputs_exist(): @@ -179,7 +181,7 @@ def export(self, *args, **kwargs): self.make_output_dir() - for dm_name, bids_name in self.name_map.items(): + for dm_name, bids_name in name_map.items(): self.link_scan(dm_name, bids_name) def link_scan(self, dm_name: str, bids_root: Path | str): @@ -196,7 +198,7 @@ def link_scan(self, dm_name: str, bids_root: Path | str): return base_target = os.path.join(self.output_dir, dm_name) - for source in glob(bids_file + "*"): + for source in glob(str(bids_root) + "*"): ext = get_extension(source) target = base_target + ext From b6502262811b23c26785cc804d9afd1f00ee9689 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Thu, 14 Aug 2025 23:29:52 -0400 Subject: [PATCH 42/45] [FIX] Update bids version selection, re-add bids functionality --- datman/exporters/__init__.py | 47 ++++-- datman/exporters/bids.py | 265 +++++++++++++++++++++++--------- datman/exporters/bids_legacy.py | 6 - 3 files changed, 223 insertions(+), 95 deletions(-) diff --git a/datman/exporters/__init__.py b/datman/exporters/__init__.py index 63c62252..378ec493 100644 --- a/datman/exporters/__init__.py +++ b/datman/exporters/__init__.py @@ -2,7 +2,9 @@ import importlib import pkgutil import logging +from packaging.version import parse +from datman.utils import check_dependency_configured from .base import Exporter, SessionExporter, SeriesExporter logger = logging.getLogger(__name__) @@ -30,6 +32,24 @@ def _load_contents(module_name): __all__.extend(contents) +def is_runnable_container(container): + """Check if a container is able to be run. + """ + try: + check_dependency_configured("apptainer", shell_cmd="apptainer") + except EnvironmentError: + logger.error(f"apptainer is not available, ignoring container.") + return False + + if not os.path.exists(container): + logger.error( + f"Container path does not exist - {container}, ignoring container." + ) + return False + + return True + + # Load everything from exporters folder (except bids exporters) so contents # can be accessed as 'datman.exporters' instead of 'datman.exporters.xxx' for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(__file__)]): @@ -42,25 +62,22 @@ def _load_contents(module_name): if os.getenv("BIDS_CONTAINER"): # Container is in use, load bids.py - _load_contents("bids") - DCM2BIDS_FOUND = True + if is_runnable_container(os.getenv("BIDS_CONTAINER")): + _load_contents("bids") + DCM2BIDS_FOUND = True + else: + logger.error(f"Cannot use dcm2bids container, ignoring bids.") + DCM2BIDS_FOUND = False else: try: - from dcm2bids import dcm2bids, Dcm2bids - except ImportError: - # dcm2bids is either not installed or version >= 3 - try: - import dcm2bids - except ImportError: - # No dcm2bids available at all - DCM2BIDS_FOUND = False + version = importlib.metadata.version("dcm2bids") + except importlib.metadata.PackageNotFoundError: + DCM2BIDS_FOUND = False + else: + if parse(version) < parse("3"): + _load_contents("bids_legacy") else: - # dcm2bids is installed and version > 3, use bids.py _load_contents("bids") - DCM2BIDS_FOUND = True - else: - # dcm2bids is installed and version < 3, use bids_legacy.py - _load_contents("bids_legacy") DCM2BIDS_FOUND = True diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index fb6d7bcc..7712c7f0 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -12,7 +12,7 @@ from .base import SessionExporter from datman.exceptions import MetadataException from datman.utils import (locate_metadata, read_blacklist, get_relative_source, - get_extension) + get_extension, write_json, run) from datman.scanid import make_filename logger = logging.getLogger(__name__) @@ -85,15 +85,14 @@ def __init__(self, config, session, importer, bids_opts=None, **kwargs): self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids", f"{session.bids_sub}_{session.bids_ses}") self.output_dir = session.bids_path - self.keep_dcm = bids_opts.keep_dcm if bids_opts else False - self.force_dcm2niix = bids_opts.force_dcm2niix if bids_opts else False - self.clobber = bids_opts.clobber if bids_opts else False - self.log_level = bids_opts.log_level if bids_opts else "INFO" - self.dcm2bids_config = bids_opts.dcm2bids_config if bids_opts else None - self.refresh = bids_opts.refresh if bids_opts else False + self.refresh = bids_opts.refresh + self.clobber = bids_opts.clobber + self.opts = bids_opts super().__init__(config, session, importer, **kwargs) - return + + def needs_raw_data(self): + return not self.outputs_exist() and not self.refresh def outputs_exist(self): if self.refresh: @@ -109,23 +108,165 @@ def outputs_exist(self): ) return False - out_dir = Path(self.output_dir) - if not out_dir.exists(): + if not os.path.exists(self.output_dir): return False - json_files = out_dir.rglob("*.json") - - - expected_scans = self.get_expected_scans() - actual_scans = self.get_actual_scans() - _, missing = self.check_contents(expected_scans, actual_scans) - if missing: + if not self.session._bids_inventory: return False + # Assume everything exists if anything does :( return True - def get_contents(self): - outputs = {} + def export(self, raw_data_dir, **kwargs): + if self.outputs_exist(): + return + + if self.dry_run: + logger.info(f"Dry run: Skipping bids export to {self.output_dir}") + return + + # Store user settings in case they change during export + orig_force = self.opts.force_dcm2niix + orig_refresh = self.refresh + + # Does this still work for repeats? + if int(self.repeat) > 1: + # Must force dcm2niix export if it's a repeat. + self.force_dcm2niix = True + + self.make_output_dir() + + try: + self.run_dcm2bids(raw_data_dir) + except Exception as e: + logger.error(f"Failed to extract to BIDs - {e}") + + # For CLM CHO / basic format. Gotta make sure apptainer exists + # apptainer run \ + # -B ${outputdir} \ + # /scratch/edickie/CLM01_pilots/containers/dcm2bids-3.2.0.sif \ + # -d ${outputdir}/dicoms/CLM01_CHO_00000003_01_SE01_MR/ \ + # -p "sub-CHO00000004" \ + # -s "ses-01" \ + # -c ${outputdir}/dcm2bids_3chorom.json \ + # -o ${outputdir}/bids \ + # --auto_extract_entities + + # Test command. Exporter may need to 'hang on to' the metadata folder + # path and the file name for the dcm2bids.json (since the file given + # can be named anything and shouldn't be assumed) + # Note also: all bound paths must exist before running + # apptainer run -B /scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/:/input -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/metadata:/metadata -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids:/output ${BIDS_CONTAINER} -d /input -p "sub-CHO00000003" -s "ses-01" -c /metadata/dcm2bids.json -o /output --auto_extract_entities + + if int(self.repeat) > 1: + # Must run a second time to move the new niftis out of the tmp dir + self.force_dcm2niix = False + self.refresh = True + try: + self.run_dcm2bids(raw_data_dir) + except Exception as e: + logger.error(f"Failed to extract data. {e}") + + self.force_dcm2niix = orig_force + self.refresh = orig_refresh + + try: + self.add_repeat_num() + except (PermissionError, JSONDecodeError): + logger.error( + "Failed to add repeat numbers to sidecars in " + f"{self.output_dir}. If a repeat scan is added, scans may " + "incorrectly be tagged as belonging to the later repeat." + ) + + def run_dcm2bids(self, raw_data_dir): + input_dir = self._get_scan_dir(raw_data_dir) + + if self.refresh and not os.path.exists(input_dir): + logger.error( + f"Cannot refresh contents of {self.output_dir}, no " + f"files found at {input_dir}.") + return + + cmd = self.make_command(input_dir) + return_code, output = run(cmd) + print(return_code) + print(output) + + def _get_scan_dir(self, download_dir): + if self.refresh: + # Use existing tmp_dir instead of raw dcms + return self.bids_tmp + return download_dir + + def make_command(self, raw_data_dir): + # CLM01_CHO_00000003_01_01 + + # ???? is this an issue because I downloaded them? + # dcm_dic = 'scans/9_DTI_HCP_b2400_AP_ADC' + + # bids_sub = 'CHO00000003' + # bids_ses = '01' + # repeat = '01' + # bids_folder = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/' + # bids_tmp = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/tmp_dcm2bids/sub-CHO00000003_ses-01' + # output_dir = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/sub-CHO00000003/ses-01' + + # raw_data_dir = "/scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/" + + conf_dir, conf_file = os.path.split(self.opts.dcm2bids_config) + + container_path = os.getenv("BIDS_CONTAINER") + if container_path: + cmd = [ + "apptainer run", + f"-B {raw_data_dir}:/input", + f"-B {conf_dir}:/config", + f"-B {self.bids_folder}:/output", + f"{container_path}", + "-d /input", + f"-c /config/{conf_file}", + "-o /output" + ] + else: + cmd = [ + "dcm2bids", + f"-d {raw_data_dir}", + f"-c {self.opts.dcm2bids_config}", + f"-o {self.bids_folder}" + ] + + cmd.extend([ + f"-p '{self.bids_sub}'", + f"-s '{self.bids_ses}'", + f"-l {self.opts.log_level}" + ]) + + if self.opts.clobber: + cmd.append("--clobber") + + if self.opts.force_dcm2niix: + cmd.append("--forceDcm2niix") + + for item in self.opts.extra_opts: + cmd.append(f"--{item}") + + return cmd + + def add_repeat_num(self): + for sidecar in Path(self.output_dir).rglob("*.json"): + + contents = read_sidecar(sidecar) + if not contents: + continue + + if "Repeat" in contents: + continue + + contents["Repeat"] = self.repeat + # Remove "Path" so it doesnt get written to the output file + del contents["Path"] + write_json(sidecar, contents) class NiiLinkExporter(SessionExporter): @@ -208,31 +349,6 @@ def link_scan(self, dm_name: str, bids_root: Path | str): rel_source = get_relative_source(source, target) make_link(rel_source, target) - def get_dm_names(self): - """Get the datman-style scan names for an entire XNAT experiment. - - This is used to - 1) Ensure the contents of the nii folder matches what may have - been produced with an old-style NiiExporter - 2) To predict if an expected scan didn't extract correctly into - the bids folder. - - Returns: - dict: A map of each series number to the name (or - names) the series would be exported under. - """ - names = {} - for scan in self.experiment.scans: - try: - series_num = int(scan.series) - except ValueError: - # Ignore xnat scans with non-numeric series numbers. - # These are often of the form MR-XX and result from duplicated - # uploads / errors when merging on xnat. - continue - names[series_num] = scan.names - return names - def get_bids_sidecars(self) -> dict[int, list]: """Get all sidecars from the session's BIDS folder. @@ -244,7 +360,7 @@ def get_bids_sidecars(self) -> dict[int, list]: bids_folder = Path(self.bids_path) for sidecar in bids_folder.rglob("*.json"): - contents = self.read_sidecar(sidecar) + contents = read_sidecar(sidecar) if not contents: continue @@ -272,32 +388,6 @@ def get_bids_sidecars(self) -> dict[int, list]: return sidecars - def read_sidecar(self, sidecar: str | Path) -> dict: - """Read the contents of a JSON sidecar file. - - NOTE: This adds the path of the file itself under the key 'Path' - """ - if not isinstance(sidecar, Path): - sidecar = Path(sidecar) - - try: - contents = sidecar.read_text(encoding="utf-8") - except (UnicodeDecodeError, OSError) as e: - logger.debug( - f"Sidecar file is unreadable {sidecar} - {e}" - ) - return {} - - try: - data = json.loads(contents) - except (json.JSONDecodeError, TypeError) as e: - logger.debug(f"Invalid json sidecar {sidecar} - {e}") - return {} - - data["Path"] = sidecar - - return data - def matches_repeat(self, sidecar: dict) -> bool: """Check if a sidecar matches the current session's 'repeat'. @@ -451,7 +541,7 @@ def get_tag_requirements(self) -> dict[str, dict]: conf = self.tags.get(tag) - if is_malformed(conf): + if is_malformed_conf(conf): logger.error( f"Ignoring tag {tag} - Incorrectly configured. Each tag " "must contain a 'Pattern' section and each 'Pattern', at " @@ -590,7 +680,8 @@ def handle_duplicate_names(self, return existing_names -def is_malformed(config: dict) -> bool: + +def is_malformed_conf(config: dict) -> bool: """Check if a tag's configuration is unusably malformed. """ if "Pattern" not in config: @@ -622,4 +713,30 @@ def make_link(source: str, target: str): except FileExistsError: pass except OSError as e: - logger.error(f"Failed to create {target} - {e}") \ No newline at end of file + logger.error(f"Failed to create {target} - {e}") + +def read_sidecar(sidecar: str | Path) -> dict: + """Read the contents of a JSON sidecar file. + + NOTE: This adds the path of the file itself under the key 'Path' + """ + if not isinstance(sidecar, Path): + sidecar = Path(sidecar) + + try: + contents = sidecar.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError) as e: + logger.debug( + f"Sidecar file is unreadable {sidecar} - {e}" + ) + return {} + + try: + data = json.loads(contents) + except (json.JSONDecodeError, TypeError) as e: + logger.debug(f"Invalid json sidecar {sidecar} - {e}") + return {} + + data["Path"] = sidecar + + return data \ No newline at end of file diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py index 1b6a5e86..74fabc75 100644 --- a/datman/exporters/bids_legacy.py +++ b/datman/exporters/bids_legacy.py @@ -380,12 +380,6 @@ def export(self, raw_data_dir, **kwargs): if self.outputs_exist(): return - # Was this ever needed? The class should never have been made. - # if not DCM2BIDS_FOUND: - # logger.info(f"Unable to export to {self.output_dir}, " - # "Dcm2Bids not found.") - # return - if self.dry_run: logger.info(f"Dry run: Skipping bids export to {self.output_dir}") return From 6d72159318d12a38cf7243700753177befc938c2 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 15 Aug 2025 19:40:37 -0400 Subject: [PATCH 43/45] [FIX] Ensure extra bids opts always defaults to list --- bin/dm_xnat_extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py index 9ed2ad9d..9d73f436 100755 --- a/bin/dm_xnat_extract.py +++ b/bin/dm_xnat_extract.py @@ -87,7 +87,7 @@ def main(): bids_out=args.bids_out, log_level=log_level, refresh=args.refresh, - extra_opts=tool_opts.get('--dcm2bids-') + extra_opts=tool_opts.get('--dcm2bids-', []) ) else: bids_opts = None From 05f3554e8cc8e75491a6662efff6fb40bb49c1fd Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 15 Aug 2025 21:06:03 -0400 Subject: [PATCH 44/45] [PEP8] Fix style issues --- datman/exporters/__init__.py | 11 ++- datman/exporters/base.py | 4 +- datman/exporters/bids.py | 145 +++++++++++++++----------------- datman/exporters/bids_legacy.py | 130 ++-------------------------- datman/exporters/dashboard.py | 4 +- datman/exporters/legacy.py | 2 +- 6 files changed, 85 insertions(+), 211 deletions(-) diff --git a/datman/exporters/__init__.py b/datman/exporters/__init__.py index 378ec493..655ed613 100644 --- a/datman/exporters/__init__.py +++ b/datman/exporters/__init__.py @@ -1,7 +1,10 @@ -import os +"""Import classes used to export dicom data to various formats. +""" + import importlib -import pkgutil import logging +import os +import pkgutil from packaging.version import parse from datman.utils import check_dependency_configured @@ -38,7 +41,7 @@ def is_runnable_container(container): try: check_dependency_configured("apptainer", shell_cmd="apptainer") except EnvironmentError: - logger.error(f"apptainer is not available, ignoring container.") + logger.error("apptainer is not available, ignoring container.") return False if not os.path.exists(container): @@ -66,7 +69,7 @@ def is_runnable_container(container): _load_contents("bids") DCM2BIDS_FOUND = True else: - logger.error(f"Cannot use dcm2bids container, ignoring bids.") + logger.error("Cannot use dcm2bids container, ignoring bids.") DCM2BIDS_FOUND = False else: try: diff --git a/datman/exporters/base.py b/datman/exporters/base.py index 6cd81237..af8c4208 100644 --- a/datman/exporters/base.py +++ b/datman/exporters/base.py @@ -6,9 +6,9 @@ class that inherits from either SessionExporter if it must work on an entire scan series at a time. """ -from abc import ABC, abstractmethod -import os import logging +import os +from abc import ABC, abstractmethod logger = logging.getLogger(__name__) diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index 7712c7f0..0f0a7879 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -4,23 +4,23 @@ import logging import json import re +import dataclasses from glob import glob -from dataclasses import dataclass from pathlib import Path import datman.config -from .base import SessionExporter from datman.exceptions import MetadataException from datman.utils import (locate_metadata, read_blacklist, get_relative_source, get_extension, write_json, run) from datman.scanid import make_filename +from .base import SessionExporter logger = logging.getLogger(__name__) __all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"] -@dataclass +@dataclasses.dataclass class BidsOptions: """Helper class for options related to exporting to BIDS format. """ @@ -32,7 +32,7 @@ class BidsOptions: dcm2bids_config: str | None = None log_level: str = "INFO" refresh: bool = False - extra_opts: list = None + extra_opts: list = dataclasses.field(default_factory=list) def __post_init__(self): self.dcm2bids_config = self.get_bids_config( @@ -73,10 +73,19 @@ def get_bids_config(self, config: datman.config.config, class BidsExporter(SessionExporter): + """Populates a study's bids folder. + """ type = "bids" - def __init__(self, config, session, importer, bids_opts=None, **kwargs): + def __init__( + self, + config: datman.config.config, + session: 'datman.scan.Scan', + importer: 'datman.importers.SessionImporter', + bids_opts: BidsOptions = None, + **kwargs + ): self.dcm_dir = importer.dcm_subdir self.bids_sub = session._ident.get_bids_name() self.bids_ses = session._ident.timepoint @@ -85,24 +94,22 @@ def __init__(self, config, session, importer, bids_opts=None, **kwargs): self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids", f"{session.bids_sub}_{session.bids_ses}") self.output_dir = session.bids_path - self.refresh = bids_opts.refresh - self.clobber = bids_opts.clobber self.opts = bids_opts super().__init__(config, session, importer, **kwargs) - def needs_raw_data(self): - return not self.outputs_exist() and not self.refresh + def needs_raw_data(self) -> bool: + return not self.outputs_exist() and not self.opts.refresh - def outputs_exist(self): - if self.refresh: + def outputs_exist(self) -> bool: + if self.opts.refresh: logger.info( f"Re-comparing existing tmp folder for {self.output_dir}" "to dcm2bids config to pull missed series." ) return False - if self.clobber: + if self.opts.clobber: logger.info( f"{self.output_dir} will be overwritten due to clobber option." ) @@ -114,10 +121,10 @@ def outputs_exist(self): if not self.session._bids_inventory: return False - # Assume everything exists if anything does :( + # Assume everything exists if anything does return True - def export(self, raw_data_dir, **kwargs): + def export(self, raw_data_dir: str, **kwargs): if self.outputs_exist(): return @@ -125,94 +132,63 @@ def export(self, raw_data_dir, **kwargs): logger.info(f"Dry run: Skipping bids export to {self.output_dir}") return - # Store user settings in case they change during export - orig_force = self.opts.force_dcm2niix - orig_refresh = self.refresh - - # Does this still work for repeats? if int(self.repeat) > 1: - # Must force dcm2niix export if it's a repeat. - self.force_dcm2niix = True + # Must force dcm2niix if it's a repeat. + force_dcm2niix = True + else: + force_dcm2niix = self.opts.force_dcm2niix self.make_output_dir() try: - self.run_dcm2bids(raw_data_dir) + self.run_dcm2bids(raw_data_dir, force_dcm2niix=force_dcm2niix) except Exception as e: logger.error(f"Failed to extract to BIDs - {e}") - # For CLM CHO / basic format. Gotta make sure apptainer exists - # apptainer run \ - # -B ${outputdir} \ - # /scratch/edickie/CLM01_pilots/containers/dcm2bids-3.2.0.sif \ - # -d ${outputdir}/dicoms/CLM01_CHO_00000003_01_SE01_MR/ \ - # -p "sub-CHO00000004" \ - # -s "ses-01" \ - # -c ${outputdir}/dcm2bids_3chorom.json \ - # -o ${outputdir}/bids \ - # --auto_extract_entities - - # Test command. Exporter may need to 'hang on to' the metadata folder - # path and the file name for the dcm2bids.json (since the file given - # can be named anything and shouldn't be assumed) - # Note also: all bound paths must exist before running - # apptainer run -B /scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/:/input -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/metadata:/metadata -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids:/output ${BIDS_CONTAINER} -d /input -p "sub-CHO00000003" -s "ses-01" -c /metadata/dcm2bids.json -o /output --auto_extract_entities - if int(self.repeat) > 1: # Must run a second time to move the new niftis out of the tmp dir - self.force_dcm2niix = False - self.refresh = True try: - self.run_dcm2bids(raw_data_dir) + self.run_dcm2bids( + raw_data_dir, force_dcm2niix=False, refresh=True + ) except Exception as e: logger.error(f"Failed to extract data. {e}") - self.force_dcm2niix = orig_force - self.refresh = orig_refresh - try: self.add_repeat_num() - except (PermissionError, JSONDecodeError): + except (PermissionError, json.JSONDecodeError): logger.error( "Failed to add repeat numbers to sidecars in " f"{self.output_dir}. If a repeat scan is added, scans may " "incorrectly be tagged as belonging to the later repeat." ) - def run_dcm2bids(self, raw_data_dir): - input_dir = self._get_scan_dir(raw_data_dir) + def run_dcm2bids(self, raw_data_dir: str, force_dcm2niix: bool = False, + refresh: bool = False): + input_dir = self._get_scan_dir(raw_data_dir, refresh) - if self.refresh and not os.path.exists(input_dir): + if refresh and not os.path.exists(input_dir): logger.error( f"Cannot refresh contents of {self.output_dir}, no " f"files found at {input_dir}.") return - cmd = self.make_command(input_dir) + cmd = self.make_command(input_dir, force_dcm2niix) return_code, output = run(cmd) - print(return_code) - print(output) + if return_code: + logger.error(f"Failed when running dcm2bids - {output}") - def _get_scan_dir(self, download_dir): - if self.refresh: + def _get_scan_dir(self, download_dir: str, refresh: bool = False) -> str: + if refresh: # Use existing tmp_dir instead of raw dcms return self.bids_tmp return download_dir - def make_command(self, raw_data_dir): - # CLM01_CHO_00000003_01_01 - - # ???? is this an issue because I downloaded them? - # dcm_dic = 'scans/9_DTI_HCP_b2400_AP_ADC' - - # bids_sub = 'CHO00000003' - # bids_ses = '01' - # repeat = '01' - # bids_folder = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/' - # bids_tmp = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/tmp_dcm2bids/sub-CHO00000003_ses-01' - # output_dir = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/sub-CHO00000003/ses-01' - - # raw_data_dir = "/scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/" + def make_command( + self, raw_data_dir: str, force_dcm2niix: bool = False + ) -> list[str]: + """Construct the dcm2bids command based on on user configuration. + """ conf_dir, conf_file = os.path.split(self.opts.dcm2bids_config) @@ -245,8 +221,8 @@ def make_command(self, raw_data_dir): if self.opts.clobber: cmd.append("--clobber") - if self.opts.force_dcm2niix: - cmd.append("--forceDcm2niix") + if force_dcm2niix: + cmd.append("--force_dcm2bids") for item in self.opts.extra_opts: cmd.append(f"--{item}") @@ -254,6 +230,12 @@ def make_command(self, raw_data_dir): return cmd def add_repeat_num(self): + """Add the sessions 'repeat' number to all of its json sidecars. + + This is used to allow us to track which files belong to which session + when there's more than one (i.e. if there's an 01_02 and so forth + instead of just 01_01) + """ for sidecar in Path(self.output_dir).rglob("*.json"): contents = read_sidecar(sidecar) @@ -401,8 +383,10 @@ def matches_repeat(self, sidecar: dict) -> bool: return self.repeat == "01" return sidecar["Repeat"] == self.repeat - def fix_split_series_nums(self, sidecars: dict[int, list] - ) -> dict[int, list]: + def fix_split_series_nums( + self, + sidecars: dict[int, list] + ) -> dict[int, list]: """Attempt to correct series nums that have been prefixed with '10'. Some older versions of dcm2niix/dcm2bids liked to prefix half of a @@ -640,10 +624,11 @@ def find_tag(self, return found - def handle_duplicate_names(self, - existing_names: dict[str, str], - new_entries: dict[str, dict] - ) -> dict[str, str]: + def handle_duplicate_names( + self, + existing_names: dict[str, str], + new_entries: dict[str, dict] + ) -> dict[str, str]: """Make duplicated names unique. Sometimes, as with multi-echo scans, multiple BIDs files will create @@ -690,6 +675,7 @@ def is_malformed_conf(config: dict) -> bool: return True return False + def remove_extension(path: Path) -> Path: """Remove all extensions from a path. """ @@ -697,15 +683,17 @@ def remove_extension(path: Path) -> Path: path = path.with_suffix("") return path + def is_broken_link(symlink: str) -> bool: return os.path.islink(symlink) and not os.path.exists(symlink) + def remove_broken_link(target: str): try: os.unlink(target) except OSError as e: logger.error(f"Failed to remove broken symlink {target} - {e}") - return + def make_link(source: str, target: str): try: @@ -715,6 +703,7 @@ def make_link(source: str, target: str): except OSError as e: logger.error(f"Failed to create {target} - {e}") + def read_sidecar(sidecar: str | Path) -> dict: """Read the contents of a JSON sidecar file. @@ -739,4 +728,4 @@ def read_sidecar(sidecar: str | Path) -> dict: data["Path"] = sidecar - return data \ No newline at end of file + return data diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py index 74fabc75..283a24ae 100644 --- a/datman/exporters/bids_legacy.py +++ b/datman/exporters/bids_legacy.py @@ -10,22 +10,21 @@ manual intervention). It can also force dcm2bids to properly export repeat sessions into the same folder, where newer versions will simply ignore them. """ -from collections import OrderedDict -from glob import glob -from json import JSONDecodeError import logging import os import re +from collections import OrderedDict +from glob import glob +from json import JSONDecodeError + +from dcm2bids import dcm2bids, Dcm2bids +from dcm2bids.sidecar import Acquisition from datman.exceptions import MetadataException from datman.scanid import make_filename from datman.utils import (splitext, get_extension, write_json, read_json, filter_niftis, read_blacklist, get_relative_source, locate_metadata) - -from dcm2bids import dcm2bids, Dcm2bids -from dcm2bids.sidecar import Acquisition - from .base import SessionExporter logger = logging.getLogger(__name__) @@ -594,123 +593,6 @@ def get_sidecars(self): contents = {path: read_json(path) for path in sidecars} return contents - def find_missing_scans(self): - """Find scans that exist on xnat but are missing from the bids folder. - """ - class FakeSidecar(dcm2bids.Sidecar): - """Turns XNAT series descriptions into pseudo-sidecars. - """ - def __init__(self, xnat_scan): - self.scan = xnat_scan - self.data = xnat_scan - self.compKeys = dcm2bids.DEFAULT.compKeys - - # Placeholders for compatibility with dcm2bids.Sidecar - self.root = ( - f"/tmp/{xnat_scan.series}" - + f"_{xnat_scan.description}" - + f"_{xnat_scan.subject}" - ) - self.filename = f"{self.root}.json" - self.data["SidecarFilename"] = self.filename - - @property - def data(self): - return self._data - - @data.setter - def data(self, scan): - self._data = OrderedDict() - self._data['SeriesDescription'] = scan.description - self._data['SeriesNumber'] = scan.series - - def __repr__(self): - return f"" - - def get_expected_names(participant, sidecars, bids_conf): - parser = dcm2bids.SidecarPairing( - sidecars, bids_conf["descriptions"] - ) - parser.build_graph() - parser.build_acquisitions(participant) - parser.find_runs() - return [acq.dstRoot for acq in parser.acquisitions] - - def remove_criteria(descriptions): - trim_conf = [] - for descr in bids_conf['descriptions']: - new_descr = descr.copy() - if len(descr['criteria']) > 1: - new_descr['criteria'] = OrderedDict() - new_descr['criteria']['SeriesDescription'] = descr[ - 'criteria']['SeriesDescription'] - trim_conf.append(new_descr) - return trim_conf - - participant = dcm2bids.Participant( - self.bids_sub, session=self.bids_ses - ) - - bids_conf = dcm2bids.load_json(self.dcm2bids_config) - - local_sidecars = [] - for search_path in [self.output_dir, self.bids_tmp]: - for item in self.find_outputs(".json", start_dir=search_path): - sidecar = dcm2bids.Sidecar(item) - if ('Repeat' in sidecar.data and - sidecar.data['Repeat'] != self.repeat): - continue - local_sidecars.append(sidecar) - local_sidecars = sorted(local_sidecars) - - xnat_sidecars = [] - for scan in self.experiment.scans: - xnat_sidecars.append(FakeSidecar(scan)) - xnat_sidecars = sorted(xnat_sidecars) - - local_scans = get_expected_names( - participant, local_sidecars, bids_conf - ) - - # Use a more permissive bids_conf when finding xnat acqs - xnat_parser = dcm2bids.SidecarPairing( - xnat_sidecars, remove_criteria(bids_conf['descriptions']) - ) - xnat_parser.build_graph() - xnat_parser.build_acquisitions(participant) - # Use this to find scans that have extra 'criteria' for single match - extra_acqs = [] - for sidecar, descriptions in xnat_parser.graph.items(): - if len(descriptions) > 1: - for descr in descriptions: - acq = Acquisition(participant, srcSidecar=sidecar, **descr) - extra_acqs.append(acq) - xnat_parser.acquisitions.extend(extra_acqs) - xnat_parser.find_runs() - xnat_scans = [acq.dstRoot for acq in xnat_parser.acquisitions] - - missing_scans = [] - for scan in xnat_scans: - if scan not in local_scans: - if "run-01" in scan: - norun_scan = scan.replace("_run-01", "") - if norun_scan not in local_scans: - missing_scans.append(scan) - else: - missing_scans.append(scan) - - extra_scans = [] - for scan in local_scans: - if scan not in xnat_scans: - if "run-01" in scan: - norun_scan = scan.replace("_run-01", "") - if norun_scan not in xnat_scans: - extra_scans.append(scan) - else: - extra_scans.append(scan) - - return missing_scans, extra_scans - class NiiLinkExporter(SessionExporter): """Populates a study's nii folder with symlinks pointing to the bids dir. diff --git a/datman/exporters/dashboard.py b/datman/exporters/dashboard.py index 86cdd290..5b5187ab 100644 --- a/datman/exporters/dashboard.py +++ b/datman/exporters/dashboard.py @@ -1,10 +1,9 @@ """An exporter to push raw datman files into the QC dashboard. """ -from datetime import datetime import logging import os +from datetime import datetime -from .base import SessionExporter import datman.config import datman.dashboard from datman.exceptions import (ConfigException, DashboardException, @@ -12,6 +11,7 @@ from datman.scanid import (KCNIIdentifier, parse, parse_bids_filename, ParseException) from datman.utils import find_tech_notes, get_extension +from .base import SessionExporter logger = logging.getLogger(__name__) diff --git a/datman/exporters/legacy.py b/datman/exporters/legacy.py index 977e64f0..df2a7fd0 100644 --- a/datman/exporters/legacy.py +++ b/datman/exporters/legacy.py @@ -12,8 +12,8 @@ import pydicom as dicom -from .base import SeriesExporter from datman.utils import run, make_temp_directory, get_extension +from .base import SeriesExporter logger = logging.getLogger(__name__) From a3bd70225867c6844d9e61528aebe87774a90ce5 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 18 Aug 2025 20:42:56 -0400 Subject: [PATCH 45/45] [ENH] Refactor away the old NiiLinkExporter --- datman/exporters/base.py | 32 ++- datman/exporters/bids.py | 490 +------------------------------- datman/exporters/bids_legacy.py | 367 +----------------------- datman/exporters/nii_symlink.py | 468 ++++++++++++++++++++++++++++++ 4 files changed, 503 insertions(+), 854 deletions(-) create mode 100644 datman/exporters/nii_symlink.py diff --git a/datman/exporters/base.py b/datman/exporters/base.py index af8c4208..67544c26 100644 --- a/datman/exporters/base.py +++ b/datman/exporters/base.py @@ -5,14 +5,15 @@ class that inherits from either SessionExporter if it must work on an entire scan session at once, or a SeriesExporter if it works on a single individual scan series at a time. """ - +import json import logging import os from abc import ABC, abstractmethod +from pathlib import Path logger = logging.getLogger(__name__) -__all__ = ["SeriesExporter", "SessionExporter"] +__all__ = ["SeriesExporter", "SessionExporter", "read_sidecar"] class Exporter(ABC): @@ -120,3 +121,30 @@ def __repr__(self): fq_name = str(self.__class__).replace("", "") name = fq_name.rsplit(".", maxsplit=1)[-1] return f"<{name} - {self.fname_root}>" + + +def read_sidecar(sidecar: str | Path) -> dict: + """Read the contents of a JSON sidecar file. + + NOTE: This adds the path of the file itself under the key 'Path' + """ + if not isinstance(sidecar, Path): + sidecar = Path(sidecar) + + try: + contents = sidecar.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError) as e: + logger.debug( + f"Sidecar file is unreadable {sidecar} - {e}" + ) + return {} + + try: + data = json.loads(contents) + except (json.JSONDecodeError, TypeError) as e: + logger.debug(f"Invalid json sidecar {sidecar} - {e}") + return {} + + data["Path"] = sidecar + + return data diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py index 0f0a7879..d9af464f 100644 --- a/datman/exporters/bids.py +++ b/datman/exporters/bids.py @@ -3,21 +3,17 @@ import os import logging import json -import re import dataclasses -from glob import glob from pathlib import Path import datman.config from datman.exceptions import MetadataException -from datman.utils import (locate_metadata, read_blacklist, get_relative_source, - get_extension, write_json, run) -from datman.scanid import make_filename -from .base import SessionExporter +from datman.utils import locate_metadata, write_json, run +from .base import SessionExporter, read_sidecar logger = logging.getLogger(__name__) -__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"] +__all__ = ["BidsExporter", "BidsOptions"] @dataclasses.dataclass @@ -249,483 +245,3 @@ def add_repeat_num(self): # Remove "Path" so it doesnt get written to the output file del contents["Path"] write_json(sidecar, contents) - - -class NiiLinkExporter(SessionExporter): - """Populates a study's nii folder with symlinks pointing to the bids dir. - """ - - type = "nii_link" - ext = ".nii.gz" - - def __init__(self, config, session, importer, **kwargs): - self.ident = session._ident - self.output_dir = session.nii_path - self.bids_path = session.bids_path - self.repeat = session.session - self.config = config - self.tags = config.get_tags(site=session.site) - - super().__init__(config, session, importer, **kwargs) - - @classmethod - def get_output_dir(cls, session): - return session.nii_path - - def needs_raw_data(self): - return False - - def outputs_exist(self): - sidecars = self.get_bids_sidecars() - name_map = self.make_dm_names(sidecars) - - for dm_name in name_map: - - if read_blacklist(scan=dm_name, config=self.config): - continue - - full_path = os.path.join(self.output_dir, dm_name + self.ext) - if not os.path.exists(full_path): - return False - - return True - - def export(self, *args, **kwargs): - sidecars = self.get_bids_sidecars() - name_map = self.make_dm_names(sidecars) - - if self.dry_run: - logger.info("Dry run: Skipping making nii folder links for " - f"mapping {name_map}") - return - - if self.outputs_exist(): - return - - self.make_output_dir() - - for dm_name, bids_name in name_map.items(): - self.link_scan(dm_name, bids_name) - - def link_scan(self, dm_name: str, bids_root: Path | str): - """Create a symlink in the datman style that points to a bids file. - - Args: - dm_name (:obj:`str`): A valid datman file name. - bids_root (:obj:`pathlib.Path`): The full path to a bids file - (without an extension). - """ - - if read_blacklist(scan=dm_name, config=self.config): - logger.debug(f"Ignoring blacklisted scan {dm_name}") - return - - base_target = os.path.join(self.output_dir, dm_name) - for source in glob(str(bids_root) + "*"): - ext = get_extension(source) - target = base_target + ext - - if is_broken_link(target): - remove_broken_link(target) - - rel_source = get_relative_source(source, target) - make_link(rel_source, target) - - def get_bids_sidecars(self) -> dict[int, list]: - """Get all sidecars from the session's BIDS folder. - - Returns: - :obj:`dict`: A map from the series number to a list of the JSON - sidecar contents that result from that series. - """ - sidecars = {} - bids_folder = Path(self.bids_path) - for sidecar in bids_folder.rglob("*.json"): - - contents = read_sidecar(sidecar) - if not contents: - continue - - if not self.matches_repeat(contents): - continue - - if "SeriesNumber" not in contents: - logger.debug( - "Ignoring malformed sidecar file (missing SeriesNumber): " - f"{sidecar}" - ) - continue - - try: - series_num = int(contents["SeriesNumber"]) - except ValueError: - logger.debug( - f"Ignoring non-numeric series number in {sidecar}" - ) - continue - - sidecars.setdefault(series_num, []).append(contents) - - self.fix_split_series_nums(sidecars) - - return sidecars - - def matches_repeat(self, sidecar: dict) -> bool: - """Check if a sidecar matches the current session's 'repeat'. - - The 'repeat' number is used to track when a scan session was stopped - and restarted during a visit. Most of the time it will be '01'. - """ - if "Repeat" not in sidecar: - # If this session is the first 'repeat' it's safe to assume an - # untagged sidecar belongs to it, since usually there's only one - # 'repeat' anyway. - return self.repeat == "01" - return sidecar["Repeat"] == self.repeat - - def fix_split_series_nums( - self, - sidecars: dict[int, list] - ) -> dict[int, list]: - """Attempt to correct series nums that have been prefixed with '10'. - - Some older versions of dcm2niix/dcm2bids liked to prefix half of a - split series' number with '10' rather than allowing all sidecars - to share the original series num. This attempts to identify when - that has happened and find the original series number for these - files. - """ - all_series = [str(series).zfill(2) for series in sidecars] - must_delete = [] - - for series in sidecars: - str_series = str(series) - - if not str_series.startswith("10"): - continue - - if len(str_series) < 4: - continue - - trimmed_series = str_series[2:] - if trimmed_series not in all_series: - # False alarm, probably not a mutated series number - continue - - sidecars[int(trimmed_series)].extend(sidecars[series]) - must_delete.append(series) - - for series in must_delete: - del sidecars[series] - - return sidecars - - def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]: - """Create a datman-style name for each identifiable sidecar. - - Args: - sidecars (`dict`): A dictionary mapping series numbers to a list - of bids sidecar files generated by that series. - - Returns: - dict: a dictionary mapping a datman-style filename to the bids - sidecar path (minus extension) it belongs to. - """ - found_names = {} - reqs = self.get_tag_requirements() - for series in sidecars: - - temp_names = {} - for item in sidecars[series]: - - found = self.find_tag(item, reqs) - - if not found: - logger.debug(f"No tag matches {item['Path']}, ignoring.") - continue - - if len(found) > 1: - logger.debug( - f"Multiple tags ({found}) match sidecar " - f"{item['Path']}. Ignoring it. Please update " - "configuration so at most one tag matches." - ) - continue - - dm_name = make_filename( - self.ident, - found[0], - series, - item["SeriesDescription"] - ) - - temp_names.setdefault(dm_name, []).append(item) - - found_names = self.handle_duplicate_names(found_names, temp_names) - - return found_names - - def get_tag_requirements(self) -> dict[str, dict]: - """Read and reformat user configuration for all tags. - - As described in datman's configuration documentation, at a minimum each - tag must define a 'SeriesDescription' regular expression. Tags - may optionally include a 'Bids' section, alongside datman's - 'Pattern' and 'Count' fields for a tag to make it more restrictive or - accurate. - - If included, the 'Bids' section should contain a list of sidecar field - names to check when determining if a tag can by applied. These must - match the sidecars fields verbatim (case-sensitive). Each field name - may then point to either: - - - a literal string to be matched - - a dictionary of settings - - The dictionary of settings may include the following keys: - - - **Pattern** (`str` or list, optional): May be a literal string or a - regular expression in Python format (e.g., use `.*` not `*`), or a - list of literal strings. Optional if `Exclude` is given. If omitted - and `Exclude` is used, the presence of the field name alone - excludes a sidecar from taking the tag. - - **Regex** (`bool`, optional): Indicates whether `Pattern` is a regex - or a string literal. Default is `False`. - - **Exclude** (`bool`, optional): Indicates whether to exclude sidecars - that match the pattern (i.e., take the inverse). Default is `False`. - - Examples: - Below are some YAML examples of commonly used configuration. - - Prevent any sidecar with an 'IntendedFor' field from matching - a tag: - - Bids: - IntendedFor: - Exclude: True - - Match a sidecar only if the PhaseEncodingDirection is exactly 'j': - - Bids: - PhaseEncodingDirection: 'j' - - Match a sidecar only if the ImageType contains 'DERIVED': - - Bids: - ImageType: - Pattern: 'DERIVED' - Regex: True - - Returns: - A dictionary mapping each tag name to the requirements that - must be met for a tag to be applied to a BIDs sidecar. - """ - reqs = {} - for tag in self.tags: - - conf = self.tags.get(tag) - - if is_malformed_conf(conf): - logger.error( - f"Ignoring tag {tag} - Incorrectly configured. Each tag " - "must contain a 'Pattern' section and each 'Pattern', at " - "a minimum, must contain a 'SeriesDescription'. Consult " - "the docs for more info.") - continue - - regex = conf["Pattern"]["SeriesDescription"] - if isinstance(regex, list): - regex = "|".join(regex) - - tag_reqs = { - "SeriesDescription": { - "Pattern": regex, - "Regex": True, - "Exclude": False - } - } - - bids_conf = conf.get("Bids", {}) - for field in bids_conf: - # Ensure consistent formatting for settings - if isinstance(bids_conf[field], (str, int)): - pattern = str(bids_conf[field]) - regex = False - exclude = False - else: - pattern = bids_conf[field].get("Pattern", "") - if not isinstance(pattern, str): - pattern = str(pattern) - regex = bids_conf[field].get("Regex", False) - exclude = bids_conf[field].get("Exclude", False) - - tag_reqs[field] = { - "Pattern": pattern, - "Regex": regex, - "Exclude": exclude - } - - reqs[tag] = tag_reqs - return reqs - - def find_tag(self, - sidecar: dict, - requirements: dict | None = None) -> list: - """Find which configured tags, if any, can be applied to a sidecar. - - Args: - sidecar (`dict`): The contents of a json sidecar. - requirements (`dict`, optional): The requirements to match - each accepted tag. Default is 'None', in which case the - default datman configuration will be consulted. - - Returns: - A list of tag names that the sidecar matches. - """ - if not requirements: - requirements = self.get_tag_requirements() - - found = [] - for tag in requirements: - - match = True - for field in requirements[tag]: - pattern = requirements[tag][field].get("Pattern", "") - is_regex = requirements[tag][field].get("Regex", False) - exclude = requirements[tag][field].get("Exclude", False) - - if field not in sidecar: - if not exclude: - # Absence of an expected field fails tag match - match = False - continue - - if exclude and not pattern: - # Excluded field is in sidecar, so doesnt match tag - match = False - continue - - actual = sidecar[field] - if not isinstance(actual, str): - actual = str(actual) - - if is_regex: - comparator = re.search - else: - comparator = re.fullmatch - - if not comparator(pattern, actual, re.IGNORECASE): - match = False - elif exclude: - # Tag does match, but settings indicate to take inverse - match = False - if match: - found.append(tag) - - return found - - def handle_duplicate_names( - self, - existing_names: dict[str, str], - new_entries: dict[str, dict] - ) -> dict[str, str]: - """Make duplicated names unique. - - Sometimes, as with multi-echo scans, multiple BIDs files will create - the same datman name. This ensures a unique name exists for each. - - Args: - existing_names (`dict`): The dictionary to add the fixed name - entries to. - new_entries (`dict`): New entries that may contain duplicated - datman-style names. - - Returns: - dict[str, str]: The existing_names dictionary with all - new entries merged in with unique names. - """ - for name in new_entries: - - if len(new_entries[name]) == 1: - existing_names[name] = remove_extension( - new_entries[name][0]["Path"] - ) - continue - - for sidecar in new_entries[name]: - if "EchoNumber" not in sidecar: - logger.error( - "Multiple BIDs files result in same file name " - f"'{name}'. Please update configuration to help " - f"identify file: {sidecar['Path']}" - ) - continue - new_name = name + f"_ECHO-{sidecar['EchoNumber']}" - existing_names[new_name] = remove_extension(sidecar['Path']) - - return existing_names - - -def is_malformed_conf(config: dict) -> bool: - """Check if a tag's configuration is unusably malformed. - """ - if "Pattern" not in config: - return True - if "SeriesDescription" not in config["Pattern"]: - return True - return False - - -def remove_extension(path: Path) -> Path: - """Remove all extensions from a path. - """ - while path.suffix: - path = path.with_suffix("") - return path - - -def is_broken_link(symlink: str) -> bool: - return os.path.islink(symlink) and not os.path.exists(symlink) - - -def remove_broken_link(target: str): - try: - os.unlink(target) - except OSError as e: - logger.error(f"Failed to remove broken symlink {target} - {e}") - - -def make_link(source: str, target: str): - try: - os.symlink(source, target) - except FileExistsError: - pass - except OSError as e: - logger.error(f"Failed to create {target} - {e}") - - -def read_sidecar(sidecar: str | Path) -> dict: - """Read the contents of a JSON sidecar file. - - NOTE: This adds the path of the file itself under the key 'Path' - """ - if not isinstance(sidecar, Path): - sidecar = Path(sidecar) - - try: - contents = sidecar.read_text(encoding="utf-8") - except (UnicodeDecodeError, OSError) as e: - logger.debug( - f"Sidecar file is unreadable {sidecar} - {e}" - ) - return {} - - try: - data = json.loads(contents) - except (json.JSONDecodeError, TypeError) as e: - logger.debug(f"Invalid json sidecar {sidecar} - {e}") - return {} - - data["Path"] = sidecar - - return data diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py index 283a24ae..06026987 100644 --- a/datman/exporters/bids_legacy.py +++ b/datman/exporters/bids_legacy.py @@ -21,15 +21,12 @@ from dcm2bids.sidecar import Acquisition from datman.exceptions import MetadataException -from datman.scanid import make_filename -from datman.utils import (splitext, get_extension, write_json, read_json, - filter_niftis, read_blacklist, get_relative_source, - locate_metadata) +from datman.utils import (splitext, write_json, read_json, locate_metadata) from .base import SessionExporter logger = logging.getLogger(__name__) -__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"] +__all__ = ["BidsExporter", "BidsOptions"] class BidsOptions: @@ -594,356 +591,6 @@ def get_sidecars(self): return contents -class NiiLinkExporter(SessionExporter): - """Populates a study's nii folder with symlinks pointing to the bids dir. - """ - - type = "nii_link" - ext = ".nii.gz" - - def __init__(self, config, session, experiment, **kwargs): - self.ident = session._ident - self.output_dir = session.nii_path - self.bids_path = session.bids_path - self.config = config - self.tags = config.get_tags(site=session.site) - - super().__init__(config, session, experiment, **kwargs) - - self.dm_names = self.get_dm_names() - self.bids_names = self.get_bids_niftis() - self.name_map = self.match_dm_to_bids(self.dm_names, self.bids_names) - - def get_dm_names(self): - """Get the datman-style scan names for an entire XNAT experiment. - - Returns: - :obj:`list`: A list of datman-style names for all scans found - for the session on XNAT. - """ - names = [] - for scan in self.experiment.scans: - names.extend(scan.names) - return names - - def get_bids_niftis(self): - """Get all nifti files from a BIDS session. - - Returns: - :obj:`list`: A list of full paths (minus the file extension) to - each bids format nifti file in the session. - """ - bids_niftis = [] - for path, _, files in os.walk(self.bids_path): - niftis = filter_niftis(files) - for item in niftis: - basename = item.replace(get_extension(item), "") - nii_path = os.path.join(path, basename) - if self.belongs_to_session(nii_path): - bids_niftis.append(nii_path) - return bids_niftis - - def belongs_to_session(self, nifti_path): - """Check if a nifti belongs to this repeat or another for this session. - - Args: - nifti_path (str): A nifti file name from the bids folder (minus - extension). - - Returns: - bool: True if the nifti file belongs to this particular - repeat. False if it belongs to another repeat. - """ - try: - side_car = read_json(nifti_path + ".json") - except FileNotFoundError: - # Assume it belongs if a side car cant be read. - return True - - repeat = side_car.get("Repeat") - if not repeat: - # No repeat is recorded in the json, assume its for this session. - return True - - return repeat == self.ident.session - - def match_dm_to_bids(self, dm_names, bids_names): - """Match each datman file name to its BIDS equivalent. - - Args: - dm_names (:obj:`list`): A list of all valid datman scan names found - for this session on XNAT. - bids_names (:obj:`list`): A list of all bids files (minus - extensions) that exist for this session. - - Returns: - :obj:`dict`: A dictionary matching the intended datman file name to - the full path (minus extension) of the same series in the bids - folder. If no matching bids file was found, it will instead be - matched to the string 'missing'. - """ - name_map = {} - for tag in self.tags: - try: - bids_conf = self.tags.get(tag)['Bids'] - except KeyError: - logger.info(f"No bids config found for tag {tag}. Can't match " - "bids outputs to a datman-style name.") - continue - - matches = self._find_matching_files(bids_names, bids_conf) - - for item in matches: - try: - dm_name = self.make_datman_name(item, tag) - except Exception as e: - logger.error( - f"Failed to assign datman style name to {item}. " - f"Reason - {e}") - continue - name_map[dm_name] = item - - for scan in dm_names: - output_file = os.path.join(self.output_dir, scan + self.ext) - if scan not in name_map and not os.path.exists(output_file): - # An expected scan is missing from the bids folder and - # hasnt already been exported directly with dcm2niix - name_map[scan] = "missing" - - return name_map - - def make_datman_name(self, bids_path, scan_tag): - """Create a Datman-style file name for a bids file. - - Args: - bids_path (str): The full path (+/- extension) of a bids file to - create a datman name for. - scan_tag (str): A datman style tag to apply to the bids scan. - - Returns: - str: A valid datman style file name (minus extension). - """ - side_car = read_json(bids_path + ".json") - description = side_car['SeriesDescription'] - num = self.get_series_num(side_car) - - dm_name = make_filename(self.ident, scan_tag, num, description) - return dm_name - - def get_series_num(self, side_car): - """Find the correct series number for a scan. - - Most JSON side car files have the correct series number already. - However, series that are split during nifti conversion (e.g. - FMAP-AP/-PA) end up with one of the two JSON files having a modified - series number. This function will default to the XNAT series number - whenever possible, for accuracy. - - Args: - side_car (:obj:`dict`): A dictionary containing the contents of a - scan's JSON side car file. - - Returns: - str: The most accurate series number found for the scan. - """ - description = side_car['SeriesDescription'] - num = str(side_car['SeriesNumber']) - xnat_scans = [item for item in self.experiment.scans - if item.description == description] - - if not xnat_scans: - return num - - if len(xnat_scans) == 1: - return xnat_scans[0].series - - # Catch split series (dcm2bids adds 1000 to the series number of - # one of the two files) - split_num = str(int(num) - 1000).zfill(2) - if any([split_num == str(item.series).zfill(2) - for item in xnat_scans]): - return split_num - - return num - - def _find_matching_files(self, bids_names, bids_conf): - """Search a list of bids files to find series that match a datman tag. - - Args: - bids_names (:obj:`list`): A list of bids file names to search - through. - bids_conf (:obj:`dict`): The bids configuration for a single tag - from datman's configuration files. - - Returns: - :obj:`list`: A list of full paths (minus extension) of bids files - that match the tag configuration. If none match, an empty - list will be returned. - """ - matches = self._filter_bids( - bids_names, bids_conf.get('class'), par_dir=True) - matches = self._filter_bids( - matches, bids_conf.get(self._get_label_key(bids_conf))) - matches = self._filter_bids(matches, bids_conf.get('task')) - matches = self._filter_bids(matches, bids_conf.get('dir')) - # The below is used to more accurately match FMAP tags - matches = self._filter_bids(matches, bids_conf.get('match_acq')) - return matches - - def _filter_bids(self, niftis, search_term, par_dir=False): - """Find the subset of file names that matches a search string. - - Args: - niftis (:obj:`list`): A list of nifti file names to search through. - search_term (:obj:`str`): The search term nifti files must match. - par_dir (bool, optional): Restricts the search to the nifti file's - parent directory, if full paths were given. - - Returns: - list: A list of all files that match the search term. - """ - if not search_term: - return niftis.copy() - - if not isinstance(search_term, list): - search_term = [search_term] - - result = set() - for item in niftis: - if par_dir: - fname = os.path.split(os.path.dirname(item))[1] - else: - fname = os.path.basename(item) - - for term in search_term: - if term in fname: - result.add(item) - return list(result) - - def _get_label_key(self, bids_conf): - """Return the name for the configuration's label field. - """ - for key in bids_conf: - if 'label' in key: - return key - return "" - - @classmethod - def get_output_dir(cls, session): - return session.nii_path - - def get_error_file(self, dm_file): - return os.path.join(self.output_dir, dm_file + ".err") - - def outputs_exist(self): - for dm_name in self.name_map: - if read_blacklist(scan=dm_name, config=self.config): - continue - - if self.name_map[dm_name] == "missing": - if not os.path.exists(self.get_error_file(dm_name)): - return False - continue - - full_path = os.path.join(self.output_dir, dm_name + self.ext) - if not os.path.exists(full_path): - return False - return True - - def needs_raw_data(self): - return False - - def export(self, *args, **kwargs): - # Re run this before exporting, in case new BIDS files exist. - self.bids_names = self.get_bids_niftis() - self.name_map = self.match_dm_to_bids(self.dm_names, self.bids_names) - - if self.dry_run: - logger.info("Dry run: Skipping making nii folder links for " - f"mapping {self.name_map}") - return - - if self.outputs_exist(): - return - - self.make_output_dir() - for dm_name, bids_name in self.name_map.items(): - if bids_name == "missing": - self.report_errors(dm_name) - else: - self.make_link(dm_name, bids_name) - # Run in case of previous errors - self.clear_errors(dm_name) - - def report_errors(self, dm_file): - """Create an error file to report probable BIDS conversion issues. - - Args: - dm_file (:obj:`str`): A valid datman file name. - """ - err_file = self.get_error_file(dm_file) - contents = ( - f"{dm_file} could not be made. This may be due to a dcm2bids " - "conversion error or an issue with downloading the raw dicoms. " - "Please contact an admin as soon as possible.\n" - ) - try: - with open(err_file, "w") as fh: - fh.write(contents) - except Exception as e: - logger.error( - f"Failed to write error file for {dm_file}. Reason - {e}" - ) - - def clear_errors(self, dm_file): - """Remove an error file from a previous BIDs export issue. - - Args: - dm_file (:obj:`str`): A valid datman file name. - """ - err_file = self.get_error_file(dm_file) - try: - os.remove(err_file) - except FileNotFoundError: - pass - except Exception as e: - logger.error(f"Failed while removing {err_file}. Reason - {e}") - - def make_link(self, dm_file, bids_file): - """Create a symlink in the datman style that points to a bids file. - - Args: - dm_file (:obj:`str`): A valid datman file name. - bids_file (:obj:`str`): The full path to a bids file (minus - extension.) - """ - base_target = os.path.join(self.output_dir, dm_file) - if read_blacklist(scan=base_target, config=self.config): - logger.debug(f"Ignoring blacklisted scan {dm_file}") - return - - for source in glob(bids_file + '*'): - ext = get_extension(source) - target = base_target + ext - - if os.path.islink(target) and not os.path.exists(target): - # Remove a broken symlink - try: - os.unlink(target) - except Exception as exc: - logger.error( - f"Failed to remove broken symlink {target} - {exc}") - continue - - rel_source = get_relative_source(source, target) - try: - os.symlink(rel_source, target) - except FileExistsError: - pass - except Exception as exc: - logger.error(f"Failed to create {target}. Reason - {exc}") - - class FakeSidecar(dcm2bids.Sidecar): """Turns XNAT series descriptions into pseudo-sidecars. """ @@ -975,16 +622,6 @@ def __repr__(self): return f"" -def get_expected_names(participant, sidecars, bids_conf): - parser = dcm2bids.SidecarPairing( - sidecars, bids_conf["descriptions"] - ) - parser.build_graph() - parser.build_acquisitions(participant) - parser.find_runs() - return [acq.dstRoot for acq in parser.acquisitions] - - def remove_criteria(descriptions): trim_conf = [] for descr in descriptions: diff --git a/datman/exporters/nii_symlink.py b/datman/exporters/nii_symlink.py new file mode 100644 index 00000000..50e62a28 --- /dev/null +++ b/datman/exporters/nii_symlink.py @@ -0,0 +1,468 @@ +"""Populate the 'nii' folder with symlinks to the bids folder. +""" +import logging +import os +import re +from glob import glob +from pathlib import Path + +from datman.scanid import make_filename +from datman.utils import (read_blacklist, get_relative_source, get_extension) +from .base import SessionExporter, read_sidecar + +logger = logging.getLogger(__name__) + +__all__ = ["NiiLinkExporter"] + + +class NiiLinkExporter(SessionExporter): + """Populates a study's nii folder with symlinks pointing to the bids dir. + """ + + type = "nii_link" + ext = ".nii.gz" + + def __init__(self, config, session, importer, **kwargs): + self.ident = session._ident + self.output_dir = session.nii_path + self.bids_path = session.bids_path + self.repeat = session.session + self.config = config + self.tags = config.get_tags(site=session.site) + + super().__init__(config, session, importer, **kwargs) + + @classmethod + def get_output_dir(cls, session): + return session.nii_path + + def needs_raw_data(self): + return False + + def outputs_exist(self): + sidecars = self.get_bids_sidecars() + name_map = self.make_dm_names(sidecars) + + for dm_name in name_map: + + if read_blacklist(scan=dm_name, config=self.config): + continue + + full_path = os.path.join(self.output_dir, dm_name + self.ext) + if not os.path.exists(full_path): + return False + + return True + + def export(self, *args, **kwargs): + sidecars = self.get_bids_sidecars() + name_map = self.make_dm_names(sidecars) + + if self.dry_run: + logger.info("Dry run: Skipping making nii folder links for " + f"mapping {name_map}") + return + + if self.outputs_exist(): + return + + self.make_output_dir() + + for dm_name, bids_name in name_map.items(): + self.link_scan(dm_name, bids_name) + + def link_scan(self, dm_name: str, bids_root: Path | str): + """Create a symlink in the datman style that points to a bids file. + + Args: + dm_name (:obj:`str`): A valid datman file name. + bids_root (:obj:`pathlib.Path`): The full path to a bids file + (without an extension). + """ + + if read_blacklist(scan=dm_name, config=self.config): + logger.debug(f"Ignoring blacklisted scan {dm_name}") + return + + base_target = os.path.join(self.output_dir, dm_name) + for source in glob(str(bids_root) + "*"): + ext = get_extension(source) + target = base_target + ext + + if is_broken_link(target): + remove_broken_link(target) + + rel_source = get_relative_source(source, target) + make_link(rel_source, target) + + def get_bids_sidecars(self) -> dict[int, list]: + """Get all sidecars from the session's BIDS folder. + + Returns: + :obj:`dict`: A map from the series number to a list of the JSON + sidecar contents that result from that series. + """ + sidecars = {} + bids_folder = Path(self.bids_path) + for sidecar in bids_folder.rglob("*.json"): + + contents = read_sidecar(sidecar) + if not contents: + continue + + if not self.matches_repeat(contents): + continue + + if "SeriesNumber" not in contents: + logger.debug( + "Ignoring malformed sidecar file (missing SeriesNumber): " + f"{sidecar}" + ) + continue + + try: + series_num = int(contents["SeriesNumber"]) + except ValueError: + logger.debug( + f"Ignoring non-numeric series number in {sidecar}" + ) + continue + + sidecars.setdefault(series_num, []).append(contents) + + self.fix_split_series_nums(sidecars) + + return sidecars + + def matches_repeat(self, sidecar: dict) -> bool: + """Check if a sidecar matches the current session's 'repeat'. + + The 'repeat' number is used to track when a scan session was stopped + and restarted during a visit. Most of the time it will be '01'. + """ + if "Repeat" not in sidecar: + # If this session is the first 'repeat' it's safe to assume an + # untagged sidecar belongs to it, since usually there's only one + # 'repeat' anyway. + return self.repeat == "01" + return sidecar["Repeat"] == self.repeat + + def fix_split_series_nums( + self, + sidecars: dict[int, list] + ) -> dict[int, list]: + """Attempt to correct series nums that have been prefixed with '10'. + + Some older versions of dcm2niix/dcm2bids liked to prefix half of a + split series' number with '10' rather than allowing all sidecars + to share the original series num. This attempts to identify when + that has happened and find the original series number for these + files. + """ + all_series = [str(series).zfill(2) for series in sidecars] + must_delete = [] + + for series in sidecars: + str_series = str(series) + + if not str_series.startswith("10"): + continue + + if len(str_series) < 4: + continue + + trimmed_series = str_series[2:] + if trimmed_series not in all_series: + # False alarm, probably not a mutated series number + continue + + sidecars[int(trimmed_series)].extend(sidecars[series]) + must_delete.append(series) + + for series in must_delete: + del sidecars[series] + + return sidecars + + def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]: + """Create a datman-style name for each identifiable sidecar. + + Args: + sidecars (`dict`): A dictionary mapping series numbers to a list + of bids sidecar files generated by that series. + + Returns: + dict: a dictionary mapping a datman-style filename to the bids + sidecar path (minus extension) it belongs to. + """ + found_names = {} + reqs = self.get_tag_requirements() + for series in sidecars: + + temp_names = {} + for item in sidecars[series]: + + found = self.find_tag(item, reqs) + + if not found: + logger.debug(f"No tag matches {item['Path']}, ignoring.") + continue + + if len(found) > 1: + logger.debug( + f"Multiple tags ({found}) match sidecar " + f"{item['Path']}. Ignoring it. Please update " + "configuration so at most one tag matches." + ) + continue + + dm_name = make_filename( + self.ident, + found[0], + series, + item["SeriesDescription"] + ) + + temp_names.setdefault(dm_name, []).append(item) + + found_names = self.handle_duplicate_names(found_names, temp_names) + + return found_names + + def get_tag_requirements(self) -> dict[str, dict]: + """Read and reformat user configuration for all tags. + + As described in datman's configuration documentation, at a minimum each + tag must define a 'SeriesDescription' regular expression. Tags + may optionally include a 'Bids' section, alongside datman's + 'Pattern' and 'Count' fields for a tag to make it more restrictive or + accurate. + + If included, the 'Bids' section should contain a list of sidecar field + names to check when determining if a tag can by applied. These must + match the sidecars fields verbatim (case-sensitive). Each field name + may then point to either: + + - a literal string to be matched + - a dictionary of settings + + The dictionary of settings may include the following keys: + + - **Pattern** (`str` or list, optional): May be a literal string or a + regular expression in Python format (e.g., use `.*` not `*`), or a + list of literal strings. Optional if `Exclude` is given. If omitted + and `Exclude` is used, the presence of the field name alone + excludes a sidecar from taking the tag. + - **Regex** (`bool`, optional): Indicates whether `Pattern` is a regex + or a string literal. Default is `False`. + - **Exclude** (`bool`, optional): Indicates whether to exclude sidecars + that match the pattern (i.e., take the inverse). Default is `False`. + + Examples: + Below are some YAML examples of commonly used configuration. + + Prevent any sidecar with an 'IntendedFor' field from matching + a tag: + + Bids: + IntendedFor: + Exclude: True + + Match a sidecar only if the PhaseEncodingDirection is exactly 'j': + + Bids: + PhaseEncodingDirection: 'j' + + Match a sidecar only if the ImageType contains 'DERIVED': + + Bids: + ImageType: + Pattern: 'DERIVED' + Regex: True + + Returns: + A dictionary mapping each tag name to the requirements that + must be met for a tag to be applied to a BIDs sidecar. + """ + reqs = {} + for tag in self.tags: + + conf = self.tags.get(tag) + + if is_malformed_conf(conf): + logger.error( + f"Ignoring tag {tag} - Incorrectly configured. Each tag " + "must contain a 'Pattern' section and each 'Pattern', at " + "a minimum, must contain a 'SeriesDescription'. Consult " + "the docs for more info.") + continue + + regex = conf["Pattern"]["SeriesDescription"] + if isinstance(regex, list): + regex = "|".join(regex) + + tag_reqs = { + "SeriesDescription": { + "Pattern": regex, + "Regex": True, + "Exclude": False + } + } + + bids_conf = conf.get("Bids", {}) + for field in bids_conf: + # Ensure consistent formatting for settings + if isinstance(bids_conf[field], (str, int)): + pattern = str(bids_conf[field]) + regex = False + exclude = False + else: + pattern = bids_conf[field].get("Pattern", "") + if not isinstance(pattern, str): + pattern = str(pattern) + regex = bids_conf[field].get("Regex", False) + exclude = bids_conf[field].get("Exclude", False) + + tag_reqs[field] = { + "Pattern": pattern, + "Regex": regex, + "Exclude": exclude + } + + reqs[tag] = tag_reqs + return reqs + + def find_tag(self, + sidecar: dict, + requirements: dict | None = None) -> list: + """Find which configured tags, if any, can be applied to a sidecar. + + Args: + sidecar (`dict`): The contents of a json sidecar. + requirements (`dict`, optional): The requirements to match + each accepted tag. Default is 'None', in which case the + default datman configuration will be consulted. + + Returns: + A list of tag names that the sidecar matches. + """ + if not requirements: + requirements = self.get_tag_requirements() + + found = [] + for tag in requirements: + + match = True + for field in requirements[tag]: + pattern = requirements[tag][field].get("Pattern", "") + is_regex = requirements[tag][field].get("Regex", False) + exclude = requirements[tag][field].get("Exclude", False) + + if field not in sidecar: + if not exclude: + # Absence of an expected field fails tag match + match = False + continue + + if exclude and not pattern: + # Excluded field is in sidecar, so doesnt match tag + match = False + continue + + actual = sidecar[field] + if not isinstance(actual, str): + actual = str(actual) + + if is_regex: + comparator = re.search + else: + comparator = re.fullmatch + + if not comparator(pattern, actual, re.IGNORECASE): + match = False + elif exclude: + # Tag does match, but settings indicate to take inverse + match = False + if match: + found.append(tag) + + return found + + def handle_duplicate_names( + self, + existing_names: dict[str, str], + new_entries: dict[str, dict] + ) -> dict[str, str]: + """Make duplicated names unique. + + Sometimes, as with multi-echo scans, multiple BIDs files will create + the same datman name. This ensures a unique name exists for each. + + Args: + existing_names (`dict`): The dictionary to add the fixed name + entries to. + new_entries (`dict`): New entries that may contain duplicated + datman-style names. + + Returns: + dict[str, str]: The existing_names dictionary with all + new entries merged in with unique names. + """ + for name in new_entries: + + if len(new_entries[name]) == 1: + existing_names[name] = remove_extension( + new_entries[name][0]["Path"] + ) + continue + + for sidecar in new_entries[name]: + if "EchoNumber" not in sidecar: + logger.error( + "Multiple BIDs files result in same file name " + f"'{name}'. Please update configuration to help " + f"identify file: {sidecar['Path']}" + ) + continue + new_name = name + f"_ECHO-{sidecar['EchoNumber']}" + existing_names[new_name] = remove_extension(sidecar['Path']) + + return existing_names + + +def is_malformed_conf(config: dict) -> bool: + """Check if a tag's configuration is unusably malformed. + """ + if "Pattern" not in config: + return True + if "SeriesDescription" not in config["Pattern"]: + return True + return False + + +def remove_extension(path: Path) -> Path: + """Remove all extensions from a path. + """ + while path.suffix: + path = path.with_suffix("") + return path + + +def is_broken_link(symlink: str) -> bool: + return os.path.islink(symlink) and not os.path.exists(symlink) + + +def remove_broken_link(target: str): + try: + os.unlink(target) + except OSError as e: + logger.error(f"Failed to remove broken symlink {target} - {e}") + + +def make_link(source: str, target: str): + try: + os.symlink(source, target) + except FileExistsError: + pass + except OSError as e: + logger.error(f"Failed to create {target} - {e}")