From 3fb30fbac22b6a05a6c9888bcbb626e54981cada Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Fri, 28 Feb 2025 14:41:24 -0500
Subject: [PATCH 01/45] [ENH] Start adding zip file importer (and move XNAT
 classes)

---
 datman/importers.py | 941 ++++++++++++++++++++++++++++++++++++++++++++
 datman/xnat.py      | 645 +-----------------------------
 2 files changed, 944 insertions(+), 642 deletions(-)
 create mode 100644 datman/importers.py

diff --git a/datman/importers.py b/datman/importers.py
new file mode 100644
index 00000000..f6d99628
--- /dev/null
+++ b/datman/importers.py
@@ -0,0 +1,941 @@
+"""Input formats that datman can use to read new data.
+
+This file contains classes for reading in data that is _new_ to datman. Datman
+uses these classes to create a uniform interface for its exporters, which
+create the files and database contents users may actually interact with.
+"""
+from abc import ABC, abstractmethod
+import glob
+import json
+import logging
+import os
+import re
+import shutil
+from zipfile import ZipFile
+
+from datman.exceptions import ParseException, XnatException, InputException
+from datman.utils import is_dicom, get_archive_headers
+
+
+logger = logging.getLogger(__name__)
+
+
+class SessionImporter(ABC):
+
+    # Exporters currently use these from XNATExperiment:
+    # experiment.name
+    # experiment.source_name (related to sharing data)
+    # experiment.scans
+    # experiment.date
+    # experiment.is_shared()
+
+    # Missed but possibly needed attributes (from extract):
+    #   experiment.assign_scan_names(config, ident)
+    #
+    # Maybe we really just need a resource exporter class...
+    #   experiment.resource_files (list of dicts)
+    #   experiment.resource_IDs (dict of folder names to numerical IDs)
+    #           e.g. {'behav': '297528', 'misc': '305312'}
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """A valid ID for the scan session being imported.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def source_name(self) -> str:
+        """The original ID of a scan session shared from another project.
+
+        If the session currently being imported originates from another
+        project, 'name' is the session's ID in the new project and source_name
+        corresponds to it's original ID. This will be equal to 'name' when
+        the session is not shared or sharing is not being tracked.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def date(self) -> str:
+        """A string representation (YYYY-MM-DD) of the scan collection date.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def scans(self) -> list['SeriesImporter']:
+        """A list scan series that belong to the session.
+        """
+        pass
+
+    @abstractmethod
+    def is_shared(self) -> bool:
+        """Indicates whether the session is shared with other projects.
+        """
+        pass
+
+
+class SeriesImporter(ABC):
+    # XNATScan attributes and methods used by exporters...
+    # .series
+    # .subject (FakeSideCar needs)
+    # .names
+    # .description
+
+    # MISSED (may have missed more in dm_xnat_extract):
+    #   scan.download_dir
+    #       xnat copy for example points to: /scratch/dawn/temp_stuff/export_zip/xnat_copy/SPN10_CMH_0083_01_SE01_MR/scans/6-t1_mprage_T1_900/resources/DICOM/files
+    #       unzipped copy would be (diff session): 20190116_Ex09352_ASND1MR_ASQB002/Ex09352_Se00003_SagT1Bravo-1mm-32ch/
+
+    @property
+    @abstractmethod
+    def series(self) -> str:
+        """A string representation of the series 'number'
+
+        This should be a string because sometimes the 'number' comes with
+        non-numeric prefixes or postfixes (e.g. on XNAT in some circumstances).
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def subject(self) -> str:
+        """The subject ID of the session this scan belongs to.
+
+        The subject ID may vary from the SessionImporter.name (i.e. a
+        truncated or extended version of it as subject may be to experiment
+        on XNAT).
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def description(self) -> str:
+        """The series description (as from the dicom headers).
+        """
+
+    @property
+    @abstractmethod
+    def names(self) -> list[str]:
+        """A list of valid scan names that may be applied to this series.
+        """
+        pass
+
+
+###############################################################################
+#### XNAT classes, formerly in xnat.py
+
+
+class XNATObject(ABC):
+    def _get_field(self, key):
+        if not self.raw_json.get("data_fields"):
+            return ""
+        return self.raw_json["data_fields"].get(key, "")
+
+
+class XNATSubject(XNATObject):
+    def __init__(self, subject_json):
+        self.raw_json = subject_json
+        self.name = self._get_field("label")
+        self.project = self._get_field("project")
+        self.experiments = self._get_experiments()
+
+    def _get_experiments(self):
+        experiments = [
+            exp for exp in self.raw_json["children"]
+            if exp["field"] == "experiments/experiment"
+        ]
+
+        if not experiments:
+            logger.debug(f"No experiments found for {self.name}")
+            return {}
+
+        found = {}
+        for item in experiments[0]["items"]:
+            exper = XNATExperiment(self.project, self.name, item)
+            found[exper.name] = exper
+
+        return found
+
+    def __str__(self):
+        return f"<XNATSubject {self.name}>"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class XNATExperiment(SessionImporter, XNATObject):
+    def __init__(self, project, subject_name, experiment_json):
+        self.raw_json = experiment_json
+        self.project = project
+        self.subject = subject_name
+        self.uid = self._get_field("UID")
+        self.id = self._get_field("ID")
+        self.date = self._get_field("date")
+
+        if self.is_shared():
+            self.name = [label for label in self.get_alt_labels()
+                         if self.subject in label][0]
+            self.source_name = self._get_field("label")
+        else:
+            self.name = self._get_field("label")
+            self.source_name = self.name
+
+        # Scan attributes
+        self.scans = self._get_scans()
+        self.scan_UIDs = self._get_scan_UIDs()
+        self.scan_resource_IDs = self._get_scan_rIDs()
+
+        # Resource attributes
+        self.resource_files = self._get_contents("resources/resource")
+        self.resource_IDs = self._get_resource_IDs()
+
+        # Misc - basically just OPT CU1 needs this
+        self.misc_resource_IDs = self._get_other_resource_IDs()
+
+    # Use properties here to conform with SessionImporter interface
+    # and guarantee at creation that expected attributes exist
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @name.setter
+    def name(self, value: str):
+        self._name = value
+
+    @property
+    def source_name(self) -> str:
+        return self._source_name
+
+    @source_name.setter
+    def source_name(self, value: str):
+        self._source_name = value
+
+    @property
+    def scans(self) -> list['XNATScan']:
+        return self._scans
+
+    @scans.setter
+    def scans(self, value: list['XNATScan']):
+        self._scans = value
+
+    @property
+    def date(self) -> str:
+        return self._date
+
+    @date.setter
+    def date(self, value: str):
+        self._date = value
+
+    def _get_contents(self, data_type):
+        children = self.raw_json.get("children", [])
+
+        contents = [
+            child["items"] for child in children if child["field"] == data_type
+        ]
+        return contents
+
+    def _get_scans(self):
+        scans = self._get_contents("scans/scan")
+        if not scans:
+            logger.debug(f"No scans found for experiment {self.name}")
+            return scans
+        xnat_scans = []
+        for scan_json in scans[0]:
+            xnat_scans.append(XNATScan(self, scan_json))
+        return xnat_scans
+
+    def _get_scan_UIDs(self):
+        return [scan.uid for scan in self.scans]
+
+    def _get_scan_rIDs(self):
+        # These can be used to download a series from xnat
+        resource_ids = []
+        for scan in self.scans:
+            for child in scan.raw_json["children"]:
+                if child["field"] != "file":
+                    continue
+                for item in child["items"]:
+                    try:
+                        label = item["data_fields"]["label"]
+                    except KeyError:
+                        continue
+                    if label != "DICOM":
+                        continue
+                    r_id = item["data_fields"]["xnat_abstractresource_id"]
+                    resource_ids.append(str(r_id))
+        return resource_ids
+
+    def _get_resource_IDs(self):
+        if not self.resource_files:
+            return {}
+
+        resource_ids = {}
+        for resource in self.resource_files[0]:
+            label = resource["data_fields"].get("label", "No Label")
+            resource_ids[label] = str(
+                resource["data_fields"]["xnat_abstractresource_id"])
+        return resource_ids
+
+    def _get_other_resource_IDs(self):
+        """
+        OPT's CU site uploads niftis to their server. These niftis are neither
+        classified as resources nor as scans so our code misses them entirely.
+        This functions grabs the abstractresource_id for these and
+        any other unique files aside from snapshots so they can be downloaded
+        """
+        r_ids = []
+        for scan in self.scans:
+            for child in scan.raw_json["children"]:
+                for file_upload in child["items"]:
+                    data_fields = file_upload["data_fields"]
+                    try:
+                        label = data_fields["label"]
+                    except KeyError:
+                        # Some entries don't have labels. Only hold some header
+                        # values. These are safe to ignore
+                        continue
+
+                    try:
+                        data_format = data_fields["format"]
+                    except KeyError:
+                        # Some entries have labels but no format... or neither
+                        if not label:
+                            # If neither, ignore. Should just be an entry
+                            # containing scan parameters, etc.
+                            continue
+                        data_format = label
+
+                    try:
+                        r_id = str(data_fields["xnat_abstractresource_id"])
+                    except KeyError:
+                        # Some entries have labels and/or a format but no
+                        # actual files and so no resource id. These can also be
+                        # safely ignored.
+                        continue
+
+                    # ignore DICOM, it's grabbed elsewhere. Ignore snapshots
+                    # entirely. Some things may not be labelled DICOM but may
+                    # be format 'DICOM' so that needs to be checked for too.
+                    if label != "DICOM" and (data_format != "DICOM"
+                                             and label != "SNAPSHOTS"):
+                        r_ids.append(r_id)
+        return r_ids
+
+    def get_autorun_ids(self, xnat):
+        """Find the ID(s) of the 'autorun.xml' workflow
+
+        XNAT has this obnoxious, on-by-default and seemingly impossible to
+        disable, 'workflow' called AutoRun.xml. It appears to do nothing other
+        than prevent certain actions (like renaming subjects/experiments) if
+        it is stuck in the running or queued state. This will grab the autorun
+        ID for this experiment so that it can be modified.
+
+        Sometimes more than one pipeline gets launched for a subject even
+        though the GUI only reports one. This will grab the ID for all of them.
+
+        Returns:
+            list: A list of string reference IDs that can be used to change
+                the status of the pipeline for this subject using XNAT's API,
+                or the empty string if the pipeline is not found.
+
+        Raises:
+            XnatException: If no AutoRun.xml pipeline instance is found or
+                the API response can't be parsed.
+        """
+        query_xml = """
+            <xdat:bundle
+                    xmlns:xdat="http://nrg.wustl.edu/security"
+                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+                    ID="@wrk:workflowData"
+                    brief-description=""
+                    description=""
+                    allow-diff-columns="0"
+                    secure="false">
+                <xdat:root_element_name>wrk:workflowData</xdat:root_element_name>
+                <xdat:search_field>
+                    <xdat:element_name>wrk:workflowData</xdat:element_name>
+                    <xdat:field_ID>pipeline_name</xdat:field_ID>
+                    <xdat:sequence>0</xdat:sequence>
+                    <xdat:type>string</xdat:type>
+                    <xdat:header>wrk:workflowData/pipeline_name</xdat:header>
+                </xdat:search_field>
+                <xdat:search_field>
+                    <xdat:element_name>wrk:workflowData</xdat:element_name>
+                    <xdat:field_ID>wrk_workflowData_id</xdat:field_ID>
+                    <xdat:sequence>1</xdat:sequence>
+                    <xdat:type>string</xdat:type>
+                    <xdat:header>wrk:workflowData/wrk_workflowData_id</xdat:header>
+                </xdat:search_field>
+                <xdat:search_where method="AND">
+                    <xdat:criteria override_value_formatting="0">
+                        <xdat:schema_field>wrk:workflowData/ID</xdat:schema_field>
+                        <xdat:comparison_type>LIKE</xdat:comparison_type>
+                        <xdat:value>{exp_id}</xdat:value>
+                    </xdat:criteria>
+                    <xdat:criteria override_value_formatting="0">
+                        <xdat:schema_field>wrk:workflowData/ExternalID</xdat:schema_field>
+                        <xdat:comparison_type>=</xdat:comparison_type>
+                        <xdat:value>{project}</xdat:value>
+                    </xdat:criteria>
+                    <xdat:criteria override_value_formatting="0">
+                        <xdat:schema_field>wrk:workflowData/pipeline_name</xdat:schema_field>
+                        <xdat:comparison_type>=</xdat:comparison_type>
+                        <xdat:value>xnat_tools/AutoRun.xml</xdat:value>
+                    </xdat:criteria>
+                </xdat:search_where>
+            </xdat:bundle>
+        """.format(exp_id=self.id, project=self.project)  # noqa: E501
+
+        query_url = f"{xnat.server}/data/search?format=json"
+        response = xnat._make_xnat_post(query_url, data=query_xml)
+
+        if not response:
+            raise XnatException("AutoRun.xml pipeline not found.")
+
+        try:
+            found_pipelines = json.loads(response)
+        except json.JSONDecodeError:
+            raise XnatException("Can't decode workflow query response.")
+
+        try:
+            results = found_pipelines["ResultSet"]["Result"]
+        except KeyError:
+            return []
+
+        wf_ids = [item.get("workflow_id") for item in results]
+
+        return wf_ids
+
+    def get_resources(self, xnat_connection):
+        """
+        Returns a list of all resource URIs from this session.
+        """
+        resources = []
+        resource_ids = list(self.resource_IDs.values())
+        resource_ids.extend(self.misc_resource_IDs)
+        for r_id in resource_ids:
+            resource_list = xnat_connection.get_resource_list(
+                self.project, self.subject, self.name, r_id)
+            resources.extend([item["URI"] for item in resource_list])
+        return resources
+
+    def download(self, xnat, dest_folder, zip_name=None):
+        """
+        Download a zip file containing all data for this session. Returns the
+        path to the new file if download is successful, raises an exception if
+        not
+
+        Args:
+            xnat: An instance of datman.xnat.xnat()
+            dest_folder: The absolute path to the folder where the zip
+                should be deposited
+            zip_name: An optional name for the output zip file. If not
+                set the zip name will be session.name
+
+        """
+        resources_list = list(self.scan_resource_IDs)
+        resources_list.extend(self.misc_resource_IDs)
+        resources_list.extend(self.resource_IDs)
+
+        if not resources_list:
+            raise ValueError(f"No scans or resources found for {self.name}")
+
+        url = (f"{xnat.server}/REST/experiments/{self.id}/resources/"
+               f"{','.join(resources_list)}/files?structure=improved"
+               "&all=true&format=zip")
+
+        if not zip_name:
+            zip_name = self.name.upper() + ".zip"
+
+        output_path = os.path.join(dest_folder, zip_name)
+        if os.path.exists(output_path):
+            logger.error(
+                f"Cannot download {output_path}, file already exists.")
+            return output_path
+
+        xnat._get_xnat_stream(url, output_path)
+
+        return output_path
+
+    def assign_scan_names(self, config, ident):
+        """Assign a datman style name to each scan in this experiment.
+
+        This will populate the XnatScan.names and XnatScan.tags fields
+        for any scan that matches the study's export configuration.
+
+        Args:
+            config (:obj:`datman.config.config`): A config object for the
+                study this experiment belongs to.
+            ident (:obj:`datman.scanid.Identifier`): A valid ID to apply
+                to this experiment's data.
+        """
+        tags = config.get_tags(site=ident.site)
+        if not tags.series_map:
+            logger.error(
+                f"Failed to get tag export info for study {config.study_name}"
+                f" and site {ident.site}")
+            return
+
+        for scan in self.scans:
+            try:
+                scan.set_datman_name(str(ident), tags)
+            except Exception as e:
+                logger.info(
+                    f"Failed to make file name for series {scan.series} "
+                    f"in session {str(ident)}. Reason {type(e).__name__}: "
+                    f"{e}")
+
+    def is_shared(self) -> bool:
+        """Check if the experiment is shared from another project.
+        """
+        alt_names = self.get_alt_labels()
+        if not alt_names:
+            return False
+
+        return any([self.subject in label for label in alt_names])
+
+    def get_alt_labels(self):
+        """Find the names for all shared copies of the XNAT experiment.
+        """
+        shared = self._get_contents("sharing/share")
+        if not shared:
+            return []
+        return [item['data_fields']['label'] for item in shared[0]]
+
+    def __str__(self):
+        return f"<XNATExperiment {self.name}>"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class XNATScan(SeriesImporter, XNATObject):
+    def __init__(self, experiment, scan_json):
+        self.project = experiment.project
+        self.subject = experiment.subject
+        self.experiment = experiment.name
+        self.shared = experiment.is_shared()
+        self.source_experiment = experiment.source_name
+        self.raw_json = scan_json
+        self.uid = self._get_field("UID")
+        self.series = self._get_field("ID")
+        self.image_type = self._get_field("parameters/imageType")
+        self.multiecho = self.is_multiecho()
+        self.description = self._set_description()
+        self.type = self._get_field("type")
+        self.names = []
+        self.tags = []
+        self.download_dir = None
+
+    # Use properties here to conform with SeriesImporter interface
+    # and guarantee at creation that expected attributes exist
+    @property
+    def series(self) -> str:
+        return self._series
+
+    @series.setter
+    def series(self, value: str):
+        self._series = value
+
+    @property
+    def subject(self) -> str:
+        return self._subject
+
+    @subject.setter
+    def subject(self, value: str):
+        self._subject = value
+
+    @property
+    def description(self) -> str:
+        return self._description
+
+    @description.setter
+    def description(self, value: str):
+        self._description = value
+
+    @property
+    def names(self) -> list[str]:
+        return self._names
+
+    @names.setter
+    def names(self, value: list[str]):
+        self._names = value
+
+    def _set_description(self):
+        series_descr = self._get_field("series_description")
+        if series_descr:
+            return series_descr
+        return self._get_field("type")
+
+    def is_multiecho(self):
+        try:
+            child = self.raw_json["children"][0]["items"][0]
+        except (KeyError, IndexError):
+            return False
+        name = child["data_fields"].get("name")
+        if name and "MultiEcho" in name:
+            return True
+        return False
+
+    def raw_dicoms_exist(self):
+        for child in self.raw_json["children"]:
+            for item in child["items"]:
+                file_type = item["data_fields"].get("content")
+                if file_type == "RAW":
+                    return True
+        return False
+
+    def is_derived(self):
+        if not self.image_type:
+            logger.warning(
+                f"Image type could not be found for series {self.series}. "
+                "Assuming it's not derived.")
+            return False
+        if "DERIVED" in self.image_type:
+            return True
+        return False
+
+    def set_tag(self, tag_map):
+        matches = {}
+        for tag, pattern in tag_map.items():
+
+            if 'SeriesDescription' in pattern:
+                regex = pattern['SeriesDescription']
+                search_target = self.description
+            elif 'XnatType' in pattern:
+                regex = pattern['XnatType']
+                search_target = self.type
+            else:
+                raise KeyError(
+                    "Missing keys 'SeriesDescription' or 'XnatType'"
+                    " for Pattern!")
+
+            if isinstance(regex, list):
+                regex = "|".join(regex)
+            if re.search(regex, search_target, re.IGNORECASE):
+                matches[tag] = pattern
+
+        if len(matches) == 1 or (len(matches) == 2 and self.multiecho):
+            self.tags = list(matches.keys())
+            return matches
+        return self._set_fmap_tag(tag_map, matches)
+
+    def _set_fmap_tag(self, tag_map, matches):
+        try:
+            for tag, pattern in tag_map.items():
+                if tag in matches:
+                    if not re.search(pattern["ImageType"], self.image_type):
+                        del matches[tag]
+        except Exception:
+            matches = {}
+
+        if len(matches) > 2 or (len(matches) == 2 and not self.multiecho):
+            matches = {}
+        self.tags = list(matches.keys())
+        return matches
+
+    def set_datman_name(self, base_name, tags):
+        mangled_descr = self._mangle_descr()
+        padded_series = self.series.zfill(2)
+        tag_settings = self.set_tag(tags.series_map)
+        if not tag_settings:
+            raise ParseException(
+                f"Can't identify tag for series {self.series}")
+        names = []
+        self.echo_dict = {}
+        for tag in tag_settings:
+            name = "_".join([base_name, tag, padded_series, mangled_descr])
+            if self.multiecho:
+                echo_num = tag_settings[tag]["EchoNumber"]
+                if echo_num not in self.echo_dict:
+                    self.echo_dict[echo_num] = name
+            names.append(name)
+
+        if len(self.tags) > 1 and not self.multiecho:
+            logger.error(f"Multiple export patterns match for {base_name}, "
+                         f"descr: {self.description}, tags: {self.tags}")
+            names = []
+            self.tags = []
+
+        self.names = names
+        return names
+
+    def _mangle_descr(self):
+        if not self.description:
+            return ""
+        return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description)
+
+    def is_usable(self, strict=False):
+        if not self.raw_dicoms_exist():
+            logger.debug(f"Ignoring {self.series} for {self.experiment}. "
+                         f"No RAW dicoms exist.")
+            return False
+
+        if not self.description:
+            logger.error(f"Can't find description for series {self.series} "
+                         f"from session {self.experiment}.")
+            return False
+
+        if not strict:
+            return True
+
+        if self.is_derived():
+            logger.debug(
+                f"Series {self.series} in session {self.experiment} is a "
+                "derived scan. Ignoring.")
+            return False
+
+        if not self.names:
+            return False
+
+        return True
+
+    def download(self, xnat_conn, output_dir):
+        """Download all dicoms for this series.
+
+        This will download all files in the series, and if successful,
+        set the download_dir attribute to the destination folder.
+
+        Args:
+            xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection
+                to the server to download from.
+            output_dir (:obj:`str`): The full path to the location to
+                download all files to.
+
+        Returns:
+            bool: True if the series was downloaded, False otherwise.
+        """
+        logger.info(f"Downloading dicoms for {self.experiment} series: "
+                    f"{self.series}.")
+
+        if self.download_dir:
+            logger.debug(
+                "Data has been previously downloaded, skipping redownload.")
+            return True
+
+        try:
+            dicom_zip = xnat_conn.get_dicom(self.project, self.subject,
+                                            self.experiment, self.series)
+        except Exception as e:
+            logger.error(f"Failed to download dicom archive for {self.subject}"
+                         f" series {self.series}. Reason - {e}")
+            return False
+
+        if os.path.getsize(dicom_zip) == 0:
+            logger.error(
+                f"Server returned an empty file for series {self.series} in "
+                f"session {self.experiment}. This may be a server error."
+            )
+            os.remove(dicom_zip)
+            return False
+
+        logger.info(f"Unpacking archive {dicom_zip}")
+
+        try:
+            with ZipFile(dicom_zip, "r") as fh:
+                fh.extractall(output_dir)
+        except Exception as e:
+            logger.error("An error occurred unpacking dicom archive for "
+                         f"{self.experiment}'s series {self.series}' - {e}")
+            os.remove(dicom_zip)
+            return False
+        else:
+            logger.info("Unpacking complete. Deleting archive file "
+                        f"{dicom_zip}")
+            os.remove(dicom_zip)
+
+        if self.shared:
+            self._fix_download_name(output_dir)
+
+        dicom_file = self._find_first_dicom(output_dir)
+
+        try:
+            self.download_dir = os.path.dirname(dicom_file)
+        except TypeError:
+            logger.warning("No valid dicom files found in XNAT session "
+                           f"{self.subject} series {self.series}.")
+            return False
+        return True
+
+    def _find_first_dicom(self, download_dir):
+        """Finds a dicom from the series (if any) in the given directory.
+
+        Args:
+            download_dir (:obj:`str`): The directory to search for dicoms.
+
+        Returns:
+            str: The full path to a dicom, or None if no readable dicoms
+                exist in the folder.
+        """
+        search_dir = self._find_series_dir(download_dir)
+        for root_dir, folder, files in os.walk(search_dir):
+            for item in files:
+                path = os.path.join(root_dir, item)
+                if is_dicom(path):
+                    return path
+
+    def _find_series_dir(self, search_dir):
+        """Find the directory a series was downloaded to, if any.
+
+        If multiple series are downloaded to the same temporary directory
+        this will search for the expected downloaded path of this scan.
+
+        Args:
+            search_dir (:obj:`str`): The full path to a directory to search.
+
+        Returns:
+            str: The full path to this scan's download location.
+        """
+        expected_path = os.path.join(search_dir, self.experiment, "scans")
+        found = glob.glob(os.path.join(expected_path, f"{self.series}-*"))
+        if not found:
+            return search_dir
+        if not os.path.exists(found[0]):
+            return search_dir
+        return found[0]
+
+    def _fix_download_name(self, output_dir):
+        """Rename a downloaded XNAT-shared scan to match the expected label.
+        """
+        orig_dir = os.path.join(output_dir, self.source_experiment)
+        try:
+            os.rename(orig_dir,
+                      orig_dir.replace(
+                          self.source_experiment,
+                          self.experiment))
+        except OSError:
+            for root, dirs, _ in os.walk(orig_dir):
+                for item in dirs:
+                    try:
+                        os.rename(os.path.join(root, item),
+                                  os.path.join(
+                                      root.replace(
+                                          self.source_experiment,
+                                          self.experiment),
+                                      item)
+                                  )
+                    except OSError:
+                        pass
+                    else:
+                        shutil.rmtree(orig_dir)
+                        return
+
+    def __str__(self):
+        return f"<XNATScan {self.experiment} - {self.series}>"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+#############################################################################
+# Zip file classes
+
+
+class ZipImporter(SessionImporter):
+
+    def __init__(self, zip_path):
+        self.path = zip_path
+        self.name = zip_path
+
+        # Does this need exception handling? Or allow calling class
+        # to do it?
+        headers = get_archive_headers(zip_path)
+        # Headers = dict[rel_path -> pydicom.dataset.FileDataset]
+        contents = {}
+        for path in headers:
+            dicom = headers[path]
+            # only need one date... but confirm all match? Or grab after
+            # constructing scan objects?
+            # Can also use AcquisitionDate, SeriesDate (?)
+            date = dicom.get('StudyDate')
+            series_description = dicom.get('SeriesDescription')
+            series = dicom.get('SeriesNumber')
+            contents[path] = {
+                'date': date,
+                'description': series_description,
+                'series': series
+            }
+        # Still need to construct the ZipSeriesImporter class
+        # and also a way of assigning names like
+        # experiment.assign_scan_names(config, ident) so truly interchangeable
+
+    # Use properties here to conform with SessionImporter interface
+    # and guarantee at creation that expected attributes exist
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @name.setter
+    def name(self, value: str):
+        self._name, _ = os.path.splitext(os.path.basename(value))
+
+    @property
+    def source_name(self) -> str:
+        # When using zip files, can't really track shared IDs so always
+        # equal name.
+        return self.name
+
+    @source_name.setter
+    def source_name(self, value: str):
+        self.name = value
+
+    @property
+    def scans(self) -> list['XNATScan']:
+        return self._scans
+
+    @scans.setter
+    def scans(self, value: list['XNATScan']):
+        self._scans = value
+
+    # @property
+    # def date(self) -> str:
+    #     return self._date
+
+    # @date.setter
+    # def date(self, value: str):
+    #     self._date = value
+
+    def is_shared(self) -> bool:
+        # Can't track shared sessions with zip files.
+        return False
+
+    def extract(self, dest_path: str) -> str:
+        """Unpack the zip file at the given location.
+
+        Args:
+            dest_path (str): The full path to the location to extract into.
+
+        Returns:
+            list, list: A list of paths to each series' folder and a list
+                of paths to non-scan files bundled with the session.
+        """
+        ##### May want to update this later to only extract series as needed
+        ##### but to grab all the folders and file info from the zip file
+        ##### before extract (I think we can read dicom headers in utils already)
+
+
+        with ZipFile(self.path, "r") as fh:
+            # Scan zips contain parent folder that holds all scan data.
+            # Grab it before extracting contents.
+            par_info = fh.filelist[0]
+            if par_info.is_dir():
+                scan_dir = os.path.join(dest_path, par_info.filename)
+            else:
+                raise InputException("Malformed scan zip folder.")
+            fh.extractall(dest_path)
+
+        scans = []
+        resources = []
+        for item in glob.glob(os.path.join(scan_dir, "*")):
+            if os.path.isdir(item):
+                scans.append(item)
+            else:
+                resources.append(item)
+
+        return scans, resources
+
+
+def ZipSeriesImporter(SeriesImporter):
diff --git a/datman/xnat.py b/datman/xnat.py
index 0d00184f..e2bbbca9 100644
--- a/datman/xnat.py
+++ b/datman/xnat.py
@@ -1,23 +1,18 @@
 """Module to interact with the xnat server"""
 
 import getpass
-import glob
-import json
 import logging
 import os
-import re
 import tempfile
 import time
-import shutil
 import urllib.parse
-from abc import ABC
 from xml.etree import ElementTree
-from zipfile import ZipFile
 
 import requests
 
-from datman.exceptions import UndefinedSetting, XnatException, ParseException
-from datman.utils import is_dicom
+from datman.exceptions import UndefinedSetting, XnatException
+from datman.importers import XNATSubject, XNATExperiment, XNATScan
+
 
 logger = logging.getLogger(__name__)
 
@@ -1274,637 +1269,3 @@ def __str__(self):
 
     def __repr__(self):
         return self.__str__()
-
-
-class XNATObject(ABC):
-    def _get_field(self, key):
-        if not self.raw_json.get("data_fields"):
-            return ""
-        return self.raw_json["data_fields"].get(key, "")
-
-
-class XNATSubject(XNATObject):
-    def __init__(self, subject_json):
-        self.raw_json = subject_json
-        self.name = self._get_field("label")
-        self.project = self._get_field("project")
-        self.experiments = self._get_experiments()
-
-    def _get_experiments(self):
-        experiments = [
-            exp for exp in self.raw_json["children"]
-            if exp["field"] == "experiments/experiment"
-        ]
-
-        if not experiments:
-            logger.debug(f"No experiments found for {self.name}")
-            return {}
-
-        found = {}
-        for item in experiments[0]["items"]:
-            exper = XNATExperiment(self.project, self.name, item)
-            found[exper.name] = exper
-
-        return found
-
-    def __str__(self):
-        return f"<XNATSubject {self.name}>"
-
-    def __repr__(self):
-        return self.__str__()
-
-
-class XNATExperiment(XNATObject):
-    def __init__(self, project, subject_name, experiment_json):
-        self.raw_json = experiment_json
-        self.project = project
-        self.subject = subject_name
-        self.uid = self._get_field("UID")
-        self.id = self._get_field("ID")
-        self.date = self._get_field("date")
-
-        if self.is_shared():
-            self.name = [label for label in self.get_alt_labels()
-                         if self.subject in label][0]
-            self.source_name = self._get_field("label")
-        else:
-            self.name = self._get_field("label")
-            self.source_name = self.name
-
-        # Scan attributes
-        self.scans = self._get_scans()
-        self.scan_UIDs = self._get_scan_UIDs()
-        self.scan_resource_IDs = self._get_scan_rIDs()
-
-        # Resource attributes
-        self.resource_files = self._get_contents("resources/resource")
-        self.resource_IDs = self._get_resource_IDs()
-
-        # Misc - basically just OPT CU1 needs this
-        self.misc_resource_IDs = self._get_other_resource_IDs()
-
-    def _get_contents(self, data_type):
-        children = self.raw_json.get("children", [])
-
-        contents = [
-            child["items"] for child in children if child["field"] == data_type
-        ]
-        return contents
-
-    def _get_scans(self):
-        scans = self._get_contents("scans/scan")
-        if not scans:
-            logger.debug(f"No scans found for experiment {self.name}")
-            return scans
-        xnat_scans = []
-        for scan_json in scans[0]:
-            xnat_scans.append(XNATScan(self, scan_json))
-        return xnat_scans
-
-    def _get_scan_UIDs(self):
-        return [scan.uid for scan in self.scans]
-
-    def _get_scan_rIDs(self):
-        # These can be used to download a series from xnat
-        resource_ids = []
-        for scan in self.scans:
-            for child in scan.raw_json["children"]:
-                if child["field"] != "file":
-                    continue
-                for item in child["items"]:
-                    try:
-                        label = item["data_fields"]["label"]
-                    except KeyError:
-                        continue
-                    if label != "DICOM":
-                        continue
-                    r_id = item["data_fields"]["xnat_abstractresource_id"]
-                    resource_ids.append(str(r_id))
-        return resource_ids
-
-    def _get_resource_IDs(self):
-        if not self.resource_files:
-            return {}
-
-        resource_ids = {}
-        for resource in self.resource_files[0]:
-            label = resource["data_fields"].get("label", "No Label")
-            resource_ids[label] = str(
-                resource["data_fields"]["xnat_abstractresource_id"])
-        return resource_ids
-
-    def _get_other_resource_IDs(self):
-        """
-        OPT's CU site uploads niftis to their server. These niftis are neither
-        classified as resources nor as scans so our code misses them entirely.
-        This functions grabs the abstractresource_id for these and
-        any other unique files aside from snapshots so they can be downloaded
-        """
-        r_ids = []
-        for scan in self.scans:
-            for child in scan.raw_json["children"]:
-                for file_upload in child["items"]:
-                    data_fields = file_upload["data_fields"]
-                    try:
-                        label = data_fields["label"]
-                    except KeyError:
-                        # Some entries don't have labels. Only hold some header
-                        # values. These are safe to ignore
-                        continue
-
-                    try:
-                        data_format = data_fields["format"]
-                    except KeyError:
-                        # Some entries have labels but no format... or neither
-                        if not label:
-                            # If neither, ignore. Should just be an entry
-                            # containing scan parameters, etc.
-                            continue
-                        data_format = label
-
-                    try:
-                        r_id = str(data_fields["xnat_abstractresource_id"])
-                    except KeyError:
-                        # Some entries have labels and/or a format but no
-                        # actual files and so no resource id. These can also be
-                        # safely ignored.
-                        continue
-
-                    # ignore DICOM, it's grabbed elsewhere. Ignore snapshots
-                    # entirely. Some things may not be labelled DICOM but may
-                    # be format 'DICOM' so that needs to be checked for too.
-                    if label != "DICOM" and (data_format != "DICOM"
-                                             and label != "SNAPSHOTS"):
-                        r_ids.append(r_id)
-        return r_ids
-
-    def get_autorun_ids(self, xnat):
-        """Find the ID(s) of the 'autorun.xml' workflow
-
-        XNAT has this obnoxious, on-by-default and seemingly impossible to
-        disable, 'workflow' called AutoRun.xml. It appears to do nothing other
-        than prevent certain actions (like renaming subjects/experiments) if
-        it is stuck in the running or queued state. This will grab the autorun
-        ID for this experiment so that it can be modified.
-
-        Sometimes more than one pipeline gets launched for a subject even
-        though the GUI only reports one. This will grab the ID for all of them.
-
-        Returns:
-            list: A list of string reference IDs that can be used to change
-                the status of the pipeline for this subject using XNAT's API,
-                or the empty string if the pipeline is not found.
-
-        Raises:
-            XnatException: If no AutoRun.xml pipeline instance is found or
-                the API response can't be parsed.
-        """
-        query_xml = """
-            <xdat:bundle
-                    xmlns:xdat="http://nrg.wustl.edu/security"
-                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-                    ID="@wrk:workflowData"
-                    brief-description=""
-                    description=""
-                    allow-diff-columns="0"
-                    secure="false">
-                <xdat:root_element_name>wrk:workflowData</xdat:root_element_name>
-                <xdat:search_field>
-                    <xdat:element_name>wrk:workflowData</xdat:element_name>
-                    <xdat:field_ID>pipeline_name</xdat:field_ID>
-                    <xdat:sequence>0</xdat:sequence>
-                    <xdat:type>string</xdat:type>
-                    <xdat:header>wrk:workflowData/pipeline_name</xdat:header>
-                </xdat:search_field>
-                <xdat:search_field>
-                    <xdat:element_name>wrk:workflowData</xdat:element_name>
-                    <xdat:field_ID>wrk_workflowData_id</xdat:field_ID>
-                    <xdat:sequence>1</xdat:sequence>
-                    <xdat:type>string</xdat:type>
-                    <xdat:header>wrk:workflowData/wrk_workflowData_id</xdat:header>
-                </xdat:search_field>
-                <xdat:search_where method="AND">
-                    <xdat:criteria override_value_formatting="0">
-                        <xdat:schema_field>wrk:workflowData/ID</xdat:schema_field>
-                        <xdat:comparison_type>LIKE</xdat:comparison_type>
-                        <xdat:value>{exp_id}</xdat:value>
-                    </xdat:criteria>
-                    <xdat:criteria override_value_formatting="0">
-                        <xdat:schema_field>wrk:workflowData/ExternalID</xdat:schema_field>
-                        <xdat:comparison_type>=</xdat:comparison_type>
-                        <xdat:value>{project}</xdat:value>
-                    </xdat:criteria>
-                    <xdat:criteria override_value_formatting="0">
-                        <xdat:schema_field>wrk:workflowData/pipeline_name</xdat:schema_field>
-                        <xdat:comparison_type>=</xdat:comparison_type>
-                        <xdat:value>xnat_tools/AutoRun.xml</xdat:value>
-                    </xdat:criteria>
-                </xdat:search_where>
-            </xdat:bundle>
-        """.format(exp_id=self.id, project=self.project)  # noqa: E501
-
-        query_url = f"{xnat.server}/data/search?format=json"
-        response = xnat._make_xnat_post(query_url, data=query_xml)
-
-        if not response:
-            raise XnatException("AutoRun.xml pipeline not found.")
-
-        try:
-            found_pipelines = json.loads(response)
-        except json.JSONDecodeError:
-            raise XnatException("Can't decode workflow query response.")
-
-        try:
-            results = found_pipelines["ResultSet"]["Result"]
-        except KeyError:
-            return []
-
-        wf_ids = [item.get("workflow_id") for item in results]
-
-        return wf_ids
-
-    def get_resources(self, xnat_connection):
-        """
-        Returns a list of all resource URIs from this session.
-        """
-        resources = []
-        resource_ids = list(self.resource_IDs.values())
-        resource_ids.extend(self.misc_resource_IDs)
-        for r_id in resource_ids:
-            resource_list = xnat_connection.get_resource_list(
-                self.project, self.subject, self.name, r_id)
-            resources.extend([item["URI"] for item in resource_list])
-        return resources
-
-    def download(self, xnat, dest_folder, zip_name=None):
-        """
-        Download a zip file containing all data for this session. Returns the
-        path to the new file if download is successful, raises an exception if
-        not
-
-        Args:
-            xnat: An instance of datman.xnat.xnat()
-            dest_folder: The absolute path to the folder where the zip
-                should be deposited
-            zip_name: An optional name for the output zip file. If not
-                set the zip name will be session.name
-
-        """
-        resources_list = list(self.scan_resource_IDs)
-        resources_list.extend(self.misc_resource_IDs)
-        resources_list.extend(self.resource_IDs)
-
-        if not resources_list:
-            raise ValueError(f"No scans or resources found for {self.name}")
-
-        url = (f"{xnat.server}/REST/experiments/{self.id}/resources/"
-               f"{','.join(resources_list)}/files?structure=improved"
-               "&all=true&format=zip")
-
-        if not zip_name:
-            zip_name = self.name.upper() + ".zip"
-
-        output_path = os.path.join(dest_folder, zip_name)
-        if os.path.exists(output_path):
-            logger.error(
-                f"Cannot download {output_path}, file already exists.")
-            return output_path
-
-        xnat._get_xnat_stream(url, output_path)
-
-        return output_path
-
-    def assign_scan_names(self, config, ident):
-        """Assign a datman style name to each scan in this experiment.
-
-        This will populate the XnatScan.names and XnatScan.tags fields
-        for any scan that matches the study's export configuration.
-
-        Args:
-            config (:obj:`datman.config.config`): A config object for the
-                study this experiment belongs to.
-            ident (:obj:`datman.scanid.Identifier`): A valid ID to apply
-                to this experiment's data.
-        """
-        tags = config.get_tags(site=ident.site)
-        if not tags.series_map:
-            logger.error(
-                f"Failed to get tag export info for study {config.study_name}"
-                f" and site {ident.site}")
-            return
-
-        for scan in self.scans:
-            try:
-                scan.set_datman_name(str(ident), tags)
-            except Exception as e:
-                logger.info(
-                    f"Failed to make file name for series {scan.series} "
-                    f"in session {str(ident)}. Reason {type(e).__name__}: "
-                    f"{e}")
-
-    def is_shared(self):
-        """Check if the experiment is shared from another project.
-        """
-        alt_names = self.get_alt_labels()
-        if not alt_names:
-            return False
-
-        return any([self.subject in label for label in alt_names])
-
-    def get_alt_labels(self):
-        """Find the names for all shared copies of the XNAT experiment.
-        """
-        shared = self._get_contents("sharing/share")
-        if not shared:
-            return []
-        return [item['data_fields']['label'] for item in shared[0]]
-
-    def __str__(self):
-        return f"<XNATExperiment {self.name}>"
-
-    def __repr__(self):
-        return self.__str__()
-
-
-class XNATScan(XNATObject):
-    def __init__(self, experiment, scan_json):
-        self.project = experiment.project
-        self.subject = experiment.subject
-        self.experiment = experiment.name
-        self.shared = experiment.is_shared()
-        self.source_experiment = experiment.source_name
-        self.raw_json = scan_json
-        self.uid = self._get_field("UID")
-        self.series = self._get_field("ID")
-        self.image_type = self._get_field("parameters/imageType")
-        self.multiecho = self.is_multiecho()
-        self.description = self._set_description()
-        self.type = self._get_field("type")
-        self.names = []
-        self.tags = []
-        self.download_dir = None
-
-    def _set_description(self):
-        series_descr = self._get_field("series_description")
-        if series_descr:
-            return series_descr
-        return self._get_field("type")
-
-    def is_multiecho(self):
-        try:
-            child = self.raw_json["children"][0]["items"][0]
-        except (KeyError, IndexError):
-            return False
-        name = child["data_fields"].get("name")
-        if name and "MultiEcho" in name:
-            return True
-        return False
-
-    def raw_dicoms_exist(self):
-        for child in self.raw_json["children"]:
-            for item in child["items"]:
-                file_type = item["data_fields"].get("content")
-                if file_type == "RAW":
-                    return True
-        return False
-
-    def is_derived(self):
-        if not self.image_type:
-            logger.warning(
-                f"Image type could not be found for series {self.series}. "
-                "Assuming it's not derived.")
-            return False
-        if "DERIVED" in self.image_type:
-            return True
-        return False
-
-    def set_tag(self, tag_map):
-        matches = {}
-        for tag, pattern in tag_map.items():
-
-            if 'SeriesDescription' in pattern:
-                regex = pattern['SeriesDescription']
-                search_target = self.description
-            elif 'XnatType' in pattern:
-                regex = pattern['XnatType']
-                search_target = self.type
-            else:
-                raise KeyError(
-                    "Missing keys 'SeriesDescription' or 'XnatType'"
-                    " for Pattern!")
-
-            if isinstance(regex, list):
-                regex = "|".join(regex)
-            if re.search(regex, search_target, re.IGNORECASE):
-                matches[tag] = pattern
-
-        if len(matches) == 1 or (len(matches) == 2 and self.multiecho):
-            self.tags = list(matches.keys())
-            return matches
-        return self._set_fmap_tag(tag_map, matches)
-
-    def _set_fmap_tag(self, tag_map, matches):
-        try:
-            for tag, pattern in tag_map.items():
-                if tag in matches:
-                    if not re.search(pattern["ImageType"], self.image_type):
-                        del matches[tag]
-        except Exception:
-            matches = {}
-
-        if len(matches) > 2 or (len(matches) == 2 and not self.multiecho):
-            matches = {}
-        self.tags = list(matches.keys())
-        return matches
-
-    def set_datman_name(self, base_name, tags):
-        mangled_descr = self._mangle_descr()
-        padded_series = self.series.zfill(2)
-        tag_settings = self.set_tag(tags.series_map)
-        if not tag_settings:
-            raise ParseException(
-                f"Can't identify tag for series {self.series}")
-        names = []
-        self.echo_dict = {}
-        for tag in tag_settings:
-            name = "_".join([base_name, tag, padded_series, mangled_descr])
-            if self.multiecho:
-                echo_num = tag_settings[tag]["EchoNumber"]
-                if echo_num not in self.echo_dict:
-                    self.echo_dict[echo_num] = name
-            names.append(name)
-
-        if len(self.tags) > 1 and not self.multiecho:
-            logger.error(f"Multiple export patterns match for {base_name}, "
-                         f"descr: {self.description}, tags: {self.tags}")
-            names = []
-            self.tags = []
-
-        self.names = names
-        return names
-
-    def _mangle_descr(self):
-        if not self.description:
-            return ""
-        return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description)
-
-    def is_usable(self, strict=False):
-        if not self.raw_dicoms_exist():
-            logger.debug(f"Ignoring {self.series} for {self.experiment}. "
-                         f"No RAW dicoms exist.")
-            return False
-
-        if not self.description:
-            logger.error(f"Can't find description for series {self.series} "
-                         f"from session {self.experiment}.")
-            return False
-
-        if not strict:
-            return True
-
-        if self.is_derived():
-            logger.debug(
-                f"Series {self.series} in session {self.experiment} is a "
-                "derived scan. Ignoring.")
-            return False
-
-        if not self.names:
-            return False
-
-        return True
-
-    def download(self, xnat_conn, output_dir):
-        """Download all dicoms for this series.
-
-        This will download all files in the series, and if successful,
-        set the download_dir attribute to the destination folder.
-
-        Args:
-            xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection
-                to the server to download from.
-            output_dir (:obj:`str`): The full path to the location to
-                download all files to.
-
-        Returns:
-            bool: True if the series was downloaded, False otherwise.
-        """
-        logger.info(f"Downloading dicoms for {self.experiment} series: "
-                    f"{self.series}.")
-
-        if self.download_dir:
-            logger.debug(
-                "Data has been previously downloaded, skipping redownload.")
-            return True
-
-        try:
-            dicom_zip = xnat_conn.get_dicom(self.project, self.subject,
-                                            self.experiment, self.series)
-        except Exception as e:
-            logger.error(f"Failed to download dicom archive for {self.subject}"
-                         f" series {self.series}. Reason - {e}")
-            return False
-
-        if os.path.getsize(dicom_zip) == 0:
-            logger.error(
-                f"Server returned an empty file for series {self.series} in "
-                f"session {self.experiment}. This may be a server error."
-            )
-            os.remove(dicom_zip)
-            return False
-
-        logger.info(f"Unpacking archive {dicom_zip}")
-
-        try:
-            with ZipFile(dicom_zip, "r") as fh:
-                fh.extractall(output_dir)
-        except Exception as e:
-            logger.error("An error occurred unpacking dicom archive for "
-                         f"{self.experiment}'s series {self.series}' - {e}")
-            os.remove(dicom_zip)
-            return False
-        else:
-            logger.info("Unpacking complete. Deleting archive file "
-                        f"{dicom_zip}")
-            os.remove(dicom_zip)
-
-        if self.shared:
-            self._fix_download_name(output_dir)
-
-        dicom_file = self._find_first_dicom(output_dir)
-
-        try:
-            self.download_dir = os.path.dirname(dicom_file)
-        except TypeError:
-            logger.warning("No valid dicom files found in XNAT session "
-                           f"{self.subject} series {self.series}.")
-            return False
-        return True
-
-    def _find_first_dicom(self, download_dir):
-        """Finds a dicom from the series (if any) in the given directory.
-
-        Args:
-            download_dir (:obj:`str`): The directory to search for dicoms.
-
-        Returns:
-            str: The full path to a dicom, or None if no readable dicoms
-                exist in the folder.
-        """
-        search_dir = self._find_series_dir(download_dir)
-        for root_dir, folder, files in os.walk(search_dir):
-            for item in files:
-                path = os.path.join(root_dir, item)
-                if is_dicom(path):
-                    return path
-
-    def _find_series_dir(self, search_dir):
-        """Find the directory a series was downloaded to, if any.
-
-        If multiple series are downloaded to the same temporary directory
-        this will search for the expected downloaded path of this scan.
-
-        Args:
-            search_dir (:obj:`str`): The full path to a directory to search.
-
-        Returns:
-            str: The full path to this scan's download location.
-        """
-        expected_path = os.path.join(search_dir, self.experiment, "scans")
-        found = glob.glob(os.path.join(expected_path, f"{self.series}-*"))
-        if not found:
-            return search_dir
-        if not os.path.exists(found[0]):
-            return search_dir
-        return found[0]
-
-    def _fix_download_name(self, output_dir):
-        """Rename a downloaded XNAT-shared scan to match the expected label.
-        """
-        orig_dir = os.path.join(output_dir, self.source_experiment)
-        try:
-            os.rename(orig_dir,
-                      orig_dir.replace(
-                          self.source_experiment,
-                          self.experiment))
-        except OSError:
-            for root, dirs, _ in os.walk(orig_dir):
-                for item in dirs:
-                    try:
-                        os.rename(os.path.join(root, item),
-                                  os.path.join(
-                                      root.replace(
-                                          self.source_experiment,
-                                          self.experiment),
-                                      item)
-                                  )
-                    except OSError:
-                        pass
-                    else:
-                        shutil.rmtree(orig_dir)
-                        return
-
-    def __str__(self):
-        return f"<XNATScan {self.experiment} - {self.series}>"
-
-    def __repr__(self):
-        return self.__str__()

From fd53d07ef6cb9cdfeb20da86a1f271f44da85ba8 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Tue, 4 Mar 2025 21:57:15 -0500
Subject: [PATCH 02/45] [ENH] Add missing methods to zip importers. Still need
 testing

---
 datman/importers.py | 281 ++++++++++++++++++++++++++++++--------------
 1 file changed, 192 insertions(+), 89 deletions(-)

diff --git a/datman/importers.py b/datman/importers.py
index f6d99628..88618a0b 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -76,6 +76,34 @@ def is_shared(self) -> bool:
         """
         pass
 
+    def assign_scan_names(self, config, ident):
+        """Assign a datman style name to each scan in this experiment.
+
+        This will populate the names and tags fields for any scan that
+        matches the study's export configuration.
+
+        Args:
+            config (:obj:`datman.config.config`): A config object for the
+                study this experiment belongs to.
+            ident (:obj:`datman.scanid.Identifier`): A valid ID to apply
+                to this experiment's data.
+        """
+        tags = config.get_tags(site=ident.site)
+        if not tags.series_map:
+            logger.error(
+                f"Failed to get tag export info for study {config.study_name}"
+                f" and site {ident.site}")
+            return
+
+        for scan in self.scans:
+            try:
+                scan.set_datman_name(str(ident), tags)
+            except Exception as e:
+                logger.info(
+                    f"Failed to make file name for series {scan.series} "
+                    f"in session {str(ident)}. Reason {type(e).__name__}: "
+                    f"{e}")
+
 
 class SeriesImporter(ABC):
     # XNATScan attributes and methods used by exporters...
@@ -115,6 +143,7 @@ def subject(self) -> str:
     def description(self) -> str:
         """The series description (as from the dicom headers).
         """
+        pass
 
     @property
     @abstractmethod
@@ -123,6 +152,13 @@ def names(self) -> list[str]:
         """
         pass
 
+    def _mangle_descr(self) -> str:
+        """Modify a series description to remove non-alphanumeric characters.
+        """
+        if not self.description:
+            return ""
+        return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description)
+
 
 ###############################################################################
 #### XNAT classes, formerly in xnat.py
@@ -214,11 +250,11 @@ def source_name(self, value: str):
         self._source_name = value
 
     @property
-    def scans(self) -> list['XNATScan']:
+    def scans(self) -> list['SeriesImporter']:
         return self._scans
 
     @scans.setter
-    def scans(self, value: list['XNATScan']):
+    def scans(self, value: list['SeriesImporter']):
         self._scans = value
 
     @property
@@ -460,34 +496,6 @@ def download(self, xnat, dest_folder, zip_name=None):
 
         return output_path
 
-    def assign_scan_names(self, config, ident):
-        """Assign a datman style name to each scan in this experiment.
-
-        This will populate the XnatScan.names and XnatScan.tags fields
-        for any scan that matches the study's export configuration.
-
-        Args:
-            config (:obj:`datman.config.config`): A config object for the
-                study this experiment belongs to.
-            ident (:obj:`datman.scanid.Identifier`): A valid ID to apply
-                to this experiment's data.
-        """
-        tags = config.get_tags(site=ident.site)
-        if not tags.series_map:
-            logger.error(
-                f"Failed to get tag export info for study {config.study_name}"
-                f" and site {ident.site}")
-            return
-
-        for scan in self.scans:
-            try:
-                scan.set_datman_name(str(ident), tags)
-            except Exception as e:
-                logger.info(
-                    f"Failed to make file name for series {scan.series} "
-                    f"in session {str(ident)}. Reason {type(e).__name__}: "
-                    f"{e}")
-
     def is_shared(self) -> bool:
         """Check if the experiment is shared from another project.
         """
@@ -663,11 +671,6 @@ def set_datman_name(self, base_name, tags):
         self.names = names
         return names
 
-    def _mangle_descr(self):
-        if not self.description:
-            return ""
-        return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description)
-
     def is_usable(self, strict=False):
         if not self.raw_dicoms_exist():
             logger.debug(f"Ignoring {self.series} for {self.experiment}. "
@@ -836,31 +839,17 @@ def __repr__(self):
 
 class ZipImporter(SessionImporter):
 
-    def __init__(self, zip_path):
+    def __init__(self, ident, zip_path):
+        # Would be good to not need ident here...
+        self.ident = ident
         self.path = zip_path
         self.name = zip_path
 
-        # Does this need exception handling? Or allow calling class
-        # to do it?
-        headers = get_archive_headers(zip_path)
-        # Headers = dict[rel_path -> pydicom.dataset.FileDataset]
-        contents = {}
-        for path in headers:
-            dicom = headers[path]
-            # only need one date... but confirm all match? Or grab after
-            # constructing scan objects?
-            # Can also use AcquisitionDate, SeriesDate (?)
-            date = dicom.get('StudyDate')
-            series_description = dicom.get('SeriesDescription')
-            series = dicom.get('SeriesNumber')
-            contents[path] = {
-                'date': date,
-                'description': series_description,
-                'series': series
-            }
-        # Still need to construct the ZipSeriesImporter class
-        # and also a way of assigning names like
-        # experiment.assign_scan_names(config, ident) so truly interchangeable
+        self.contents = self.parse_contents()
+        self.scans = self.get_scans()
+        self.resources = self.contents['resources']
+
+        self.date = self.scans[0].datess
 
     # Use properties here to conform with SessionImporter interface
     # and guarantee at creation that expected attributes exist
@@ -883,20 +872,20 @@ def source_name(self, value: str):
         self.name = value
 
     @property
-    def scans(self) -> list['XNATScan']:
+    def scans(self) -> list['SeriesImporter']:
         return self._scans
 
     @scans.setter
-    def scans(self, value: list['XNATScan']):
+    def scans(self, value: list['SeriesImporter']):
         self._scans = value
 
-    # @property
-    # def date(self) -> str:
-    #     return self._date
+    @property
+    def date(self) -> str:
+        return self._date
 
-    # @date.setter
-    # def date(self, value: str):
-    #     self._date = value
+    @date.setter
+    def date(self, value: str):
+        self._date = value
 
     def is_shared(self) -> bool:
         # Can't track shared sessions with zip files.
@@ -907,35 +896,149 @@ def extract(self, dest_path: str) -> str:
 
         Args:
             dest_path (str): The full path to the location to extract into.
-
-        Returns:
-            list, list: A list of paths to each series' folder and a list
-                of paths to non-scan files bundled with the session.
         """
-        ##### May want to update this later to only extract series as needed
-        ##### but to grab all the folders and file info from the zip file
-        ##### before extract (I think we can read dicom headers in utils already)
-
+        for item in self.scans:
+            item.extract(dest_path)
+        self.extract_resources(dest_path)
 
+    def extract_resources(self, dest_path: str):
         with ZipFile(self.path, "r") as fh:
-            # Scan zips contain parent folder that holds all scan data.
-            # Grab it before extracting contents.
-            par_info = fh.filelist[0]
-            if par_info.is_dir():
-                scan_dir = os.path.join(dest_path, par_info.filename)
-            else:
-                raise InputException("Malformed scan zip folder.")
-            fh.extractall(dest_path)
+            for item in self.resources:
+                fh.extract(item, path=dest_path)
+
+    def parse_contents(self):
+        contents = {
+            'scans': {},
+            'resources': []
+        }
+        with ZipFile(self.path, "r") as fh:
+            par_dir = fh.filelist[0].filename.strip('/')
+            for item in fh.filelist[1:]:
+                if item.is_dir():
+                    contents['scans'].setdefault(item.filename.strip('/'), [])
+                else:
+                    folder, _ = os.path.split(item.filename)
+                    if folder == par_dir:
+                        contents['resources'].append(item.filename)
+                    else:
+                        contents['scans'].setdefault(folder, []).append(
+                            item.filename)
+        return contents
 
+    def get_scans(self):
+        # Headers = dict[rel_path -> pydicom.dataset.FileDataset]
+        headers = get_archive_headers(self.path)
         scans = []
-        resources = []
-        for item in glob.glob(os.path.join(scan_dir, "*")):
-            if os.path.isdir(item):
-                scans.append(item)
-            else:
-                resources.append(item)
+        for sub_path in headers:
+            # .get_full_subjectid may need to be changed for compatibility
+            scans.append(
+                ZipSeriesImporter(
+                    self.ident.get_full_subjectid(), self.path, sub_path,
+                    headers[sub_path], self.contents['scans'][sub_path]
+                )
+            )
+        return scans
+
+    def __str__(self):
+        return f"<ZipImporter {self.path}"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class ZipSeriesImporter(SeriesImporter):
+
+    def __init__(self, subject, zip_file, dcm_dir, header, zip_items):
+        self.subject = subject
+        self.zip_file = zip_file
+        self.dcm_dir = dcm_dir
+        self.header = header
+        self.contents = zip_items
+        self.date = str(header.get('StudyDate'))
+        self.series = str(header.get('SeriesNumber'))
+        self.description = str(header.get('SeriesDescription'))
+        self.uid = str(header.get('StudyInstanceUID'))
+        self.image_type = "////".join(header.get("ImageType"))
+        self.names = []
 
-        return scans, resources
+    # Use properties here to conform with SeriesImporter interface
+    # and guarantee at creation that expected attributes exist
+    @property
+    def series(self) -> str:
+        return self._series
 
+    @series.setter
+    def series(self, value: str):
+        self._series = value
+
+    @property
+    def subject(self) -> str:
+        return self._subject
+
+    @subject.setter
+    def subject(self, value: str):
+        self._subject = value
+
+    @property
+    def description(self) -> str:
+        return self._description
+
+    @description.setter
+    def description(self, value: str):
+        self._description = value
 
-def ZipSeriesImporter(SeriesImporter):
+    @property
+    def names(self) -> list[str]:
+        return self._names
+
+    @names.setter
+    def names(self, value: list[str]):
+        self._names = value
+
+    def extract(self, output_dir: str):
+        with ZipFile(self.zip_file, "r") as fh:
+            for item in self.contents:
+                fh.extract(item, path=output_dir)
+        self.download_dir = os.path.join(output_dir, self.dcm_dir)
+
+    def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo'
+            ) -> list[str]:
+        mangled_descr = self._mangle_descr()
+        tag_settings = self.set_tag(tags.series_map)
+        if not tag_settings:
+            raise ParseException(
+                f"Can't identify tag for series {self.series}")
+
+        names = []
+        for tag in tag_settings:
+            names.append(
+                "_".join([base_name, tag, self.series.zfill(2), mangled_descr])
+            )
+
+        self.names = names
+        return names
+
+    def set_tag(self, tag_map):
+        matches = {}
+        for tag, pattern in tag_map.items():
+            if 'SeriesDescription' not in pattern:
+                raise KeyError(
+                    "Missing key 'SeriesDescription' for 'Pattern'!")
+
+            regex = pattern['SeriesDescription']
+            if isinstance(regex, list):
+                regex = "|".join(regex)
+
+            if re.search(regex, self.description, re.IGNORECASE):
+                matches[tag] = pattern
+
+        if (len(matches) == 1 or
+                all(['EchoNumber' in matches[tag] for tag in matches])):
+            self.tags = list(matches.keys())
+            return matches
+
+    def __str__(self):
+        return f"<ZipSeriesImporter {self.series} - {self.description}>"
+
+    def __repr__(self):
+        return self.__str__()
\ No newline at end of file

From f6ce3abf96aeb5136d82fd95f5d900114a61dde9 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Mon, 10 Mar 2025 22:45:40 -0400
Subject: [PATCH 03/45] [ENH] Update extract to allow zip files to be used as
 input

---
 bin/dm_xnat_extract.py | 125 ++++++++++++++++++++++++++++----------
 datman/exporters.py    |   4 +-
 datman/importers.py    | 133 +++++++++++++++++++++++++++++++----------
 3 files changed, 197 insertions(+), 65 deletions(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index 2268c67b..fdb10e40 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -44,15 +44,18 @@
 
 """
 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
+import glob
 import logging
 import os
 import platform
 import shutil
 import sys
+from zipfile import BadZipFile
 
 import datman.config
 import datman.exceptions
 import datman.exporters
+import datman.importers
 import datman.scan
 import datman.scanid
 import datman.xnat
@@ -137,30 +140,20 @@ def main():
     else:
         bids_opts = None
 
-    auth = datman.xnat.get_auth(args.username) if args.username else None
+    sessions = get_sessions(config, args)
 
-    if args.experiment:
-        experiments = collect_experiment(
-            config, args.experiment, args.study, auth=auth, url=args.server)
-    else:
-        experiments = collect_all_experiments(
-            config, auth=auth, url=args.server)
+    logger.info(f"Found {len(session)} sessions for study {args.study}")
 
-    logger.info(f"Found {len(experiments)} experiments for study {args.study}")
+    for xnat, importer in sessions:
+        session = datman.scan.Scan(importer._ident, config,
+                                   bids_root=args.bids_out)
 
-    for xnat, project, ident in experiments:
-        xnat_experiment = get_xnat_experiment(xnat, project, ident)
-        if not xnat_experiment:
-            continue
-
-        session = datman.scan.Scan(ident, config, bids_root=args.bids_out)
-
-        if xnat_experiment.resource_files:
-            export_resources(session.resource_path, xnat, xnat_experiment,
+        if importer.resource_files:
+            export_resources(session.resource_path, xnat, importer,
                              dry_run=args.dry_run)
 
-        if xnat_experiment.scans:
-            export_scans(config, xnat, xnat_experiment, session,
+        if importer.scans:
+            export_scans(config, xnat, importer, session,
                          bids_opts=bids_opts, dry_run=args.dry_run,
                          ignore_db=args.dont_update_dashboard,
                          wanted_tags=args.tag)
@@ -186,7 +179,7 @@ def _is_file(path, parser):
     )
 
     g_main = parser.add_argument_group(
-        "Options for choosing data from XNAT to extract"
+        "Options for choosing data to extract"
     )
     g_main.add_argument(
         "study",
@@ -234,6 +227,12 @@ def _is_file(path, parser):
         "--use-dcm2bids", action="store_true", default=False,
         help="Pull xnat data and convert to bids using dcm2bids"
     )
+    g_main.add_argument(
+        "--use-zips", action="store", metavar="ZIP_DIR",
+        nargs="?", default="USE_XNAT",
+        help="A directory of zip files to use instead of pulling from XNAT. "
+             "If not provided the study's 'dicom' dir will be used instead."
+    )
 
     g_dcm2bids = parser.add_argument_group(
         "Options for using dcm2bids"
@@ -335,6 +334,55 @@ def configure_logging(study, log_level):
     logging.getLogger('datman.exporters').addHandler(ch)
 
 
+def get_sessions(config, args):
+    if args.use_zips != "USE_XNAT":
+        return collect_zips(config, args)
+
+    auth = datman.xnat.get_auth(args.username) if args.username else None
+
+    if args.experiment:
+        return collect_experiment(
+            config, args.experiment, args.study, auth=auth, url=args.server)
+
+    return collect_all_experiments(config, auth=auth, url=args.server)
+
+
+def collect_zips(config, args):
+    if args.use_zips is None:
+        zip_folder = config.get_path("dicom")
+    else:
+        zip_folder = args.use_zips
+
+    if not os.path.exists(zip_folder):
+        logger.error(f"Zip file directory not found: {zip_folder}")
+        return []
+
+    if args.experiment:
+        ident = get_identifier(config, args.experiment)
+        if not ident:
+            logger.error(f"Invalid session ID {args.experiment}.")
+            return []
+
+        zip_path = os.path.join(zip_folder, str(ident) + ".zip")
+        if not os.path.exists(zip_path):
+            logger.error(f"Zip file not found: {zip_path}")
+            return
+
+        return [None, datman.importers.ZipImporter(ident, zip_path)]
+
+    zip_files = []
+    for zip_path in glob.glob(os.path.join(zip_folder, "*.zip")):
+        sess_name = os.path.basename(zip_path).replace(".zip", "")
+        ident = get_identifier(config, sess_name)
+        if not ident:
+            logger.error(
+                f"Ignoring invalid zip file name in dicom dir: {sess_name}")
+            continue
+        zip_files.append([None, datman.importers.ZipImporter(ident, zip_path)])
+
+    return zip_files
+
+
 def collect_experiment(config, experiment_id, study, url=None, auth=None):
     ident = get_identifier(config, experiment_id)
     xnat = datman.xnat.get_connection(
@@ -349,6 +397,10 @@ def collect_experiment(config, experiment_id, study, url=None, auth=None):
                      f"Ensure it matches an existing experiment ID.")
         return []
 
+    experiment = get_xnat_experiment(xnat, xnat_project, ident)
+    if not experiment:
+        return []
+
     return [(xnat, xnat_project, ident)]
 
 
@@ -445,9 +497,9 @@ def get_xnat_experiment(xnat, project, ident):
     return xnat_experiment
 
 
-def export_resources(resource_dir, xnat, xnat_experiment, dry_run=False):
-    logger.info(f"Extracting {len(xnat_experiment.resource_files)} resources "
-                f"from {xnat_experiment.name}")
+def export_resources(resource_dir, xnat, importer, dry_run=False):
+    logger.info(f"Extracting {len(importer.resource_files)} resources "
+                f"from {importer.name}")
 
     if not os.path.isdir(resource_dir):
         logger.info(f"Creating resources dir {resource_dir}")
@@ -457,6 +509,12 @@ def export_resources(resource_dir, xnat, xnat_experiment, dry_run=False):
             logger.error(f"Failed creating resources dir {resource_dir}")
             return
 
+    if isinstance(importer, datman.importers.ZipImporter):
+        importer.get_resources(resource_dir)
+        return
+
+    xnat_experiment = importer
+
     for label in xnat_experiment.resource_IDs:
         if label == "No Label":
             target_path = os.path.join(resource_dir, "MISC")
@@ -549,7 +607,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id,
     return target_path
 
 
-def export_scans(config, xnat, xnat_experiment, session, bids_opts=None,
+def export_scans(config, xnat, importer, session, bids_opts=None,
                  wanted_tags=None, ignore_db=False, dry_run=False):
     """Export all XNAT data for a session to desired formats.
 
@@ -558,8 +616,9 @@ def export_scans(config, xnat, xnat_experiment, session, bids_opts=None,
             the study the experiment belongs to.
         xnat (:obj:`datman.xnat.xnat`): An XNAT connection for the server
             the experiment resides on.
-        xnat_experiment (:obj:`datman.xnat.XNATExperiment`): The experiment
-            to download, extract and export.
+        importer (:obj:`datman.importer.SessionImporter`): An instance of
+            a SessionImporter that holds all information needed to get
+            scans data.
         session (:obj:`datman.scan.Scan`): The datman session this experiment
             belongs to.
         bids_opts (:obj:`BidsOptions`, optional): dcm2bids settings to be
@@ -574,28 +633,28 @@ def export_scans(config, xnat, xnat_experiment, session, bids_opts=None,
     """
     logger.info(f"Processing scans in experiment {xnat_experiment.name}")
 
-    xnat_experiment.assign_scan_names(config, session._ident)
+    importer.assign_scan_names(config, session._ident)
 
     session_exporters = make_session_exporters(
-        config, session, xnat_experiment, bids_opts=bids_opts,
+        config, session, importer, bids_opts=bids_opts,
         ignore_db=ignore_db, dry_run=dry_run)
 
     series_exporters = make_all_series_exporters(
-        config, session, xnat_experiment, bids_opts=bids_opts,
+        config, session, importer, bids_opts=bids_opts,
         wanted_tags=wanted_tags, dry_run=dry_run
     )
 
     if not needs_export(session_exporters) and not series_exporters:
-        logger.debug(f"Session {xnat_experiment} already extracted. Skipping.")
+        logger.debug(f"Session {importer} already extracted. Skipping.")
         return
 
     with make_temp_directory(prefix="dm_xnat_extract_") as temp_dir:
-        for scan in xnat_experiment.scans:
+        for scan in importer.scans:
             if needs_download(scan, session_exporters, series_exporters):
-                scan.download(xnat, temp_dir)
+                scan.get_files(temp_dir, xnat)
 
             for exporter in series_exporters.get(scan, []):
-                exporter.export(scan.download_dir)
+                exporter.export(scan.dcm_dir)
 
         for exporter in session_exporters:
             try:
diff --git a/datman/exporters.py b/datman/exporters.py
index a78efee3..1bd77311 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -183,7 +183,7 @@ class BidsExporter(SessionExporter):
     type = "bids"
 
     def __init__(self, config, session, experiment, bids_opts=None, **kwargs):
-        self.exp_label = experiment.name
+        self.dcm_dir = experiment.dcm_dir
         self.bids_sub = session._ident.get_bids_name()
         self.bids_ses = session._ident.timepoint
         self.repeat = session._ident.session
@@ -379,7 +379,7 @@ def _get_scan_dir(self, download_dir):
                 f"sub-{self.bids_sub}_ses-{self.bids_ses}"
             )
             return tmp_dir
-        return os.path.join(download_dir, self.exp_label, "scans")
+        return os.path.join(download_dir, self.dcm_dir)
 
     def outputs_exist(self):
         if self.refresh:
diff --git a/datman/importers.py b/datman/importers.py
index 88618a0b..9716773c 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -5,6 +5,7 @@
 create the files and database contents users may actually interact with.
 """
 from abc import ABC, abstractmethod
+from datetime import datetime
 import glob
 import json
 import logging
@@ -70,6 +71,13 @@ def scans(self) -> list['SeriesImporter']:
         """
         pass
 
+    @property
+    @abstractmethod
+    def dcm_dir(self) -> str:
+        """The subfolder that will hold the session's dicom dirs.
+        """
+        pass
+
     @abstractmethod
     def is_shared(self) -> bool:
         """Indicates whether the session is shared with other projects.
@@ -116,6 +124,12 @@ class SeriesImporter(ABC):
     #   scan.download_dir
     #       xnat copy for example points to: /scratch/dawn/temp_stuff/export_zip/xnat_copy/SPN10_CMH_0083_01_SE01_MR/scans/6-t1_mprage_T1_900/resources/DICOM/files
     #       unzipped copy would be (diff session): 20190116_Ex09352_ASND1MR_ASQB002/Ex09352_Se00003_SagT1Bravo-1mm-32ch/
+    @property
+    @abstractmethod
+    def dcm_dir(self) -> str:
+        """Full path to the folder that holds a local copy of the dicom files.
+        """
+        pass
 
     @property
     @abstractmethod
@@ -152,6 +166,11 @@ def names(self) -> list[str]:
         """
         pass
 
+    @abstractmethod
+    def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo'
+        ) -> list[str]:
+        pass
+
     def _mangle_descr(self) -> str:
         """Modify a series description to remove non-alphanumeric characters.
         """
@@ -219,6 +238,9 @@ def __init__(self, project, subject_name, experiment_json):
             self.name = self._get_field("label")
             self.source_name = self.name
 
+        # The subdirectory to find the dicoms in after download
+        self.dcm_dir = os.path.join(self.name, "scans")
+
         # Scan attributes
         self.scans = self._get_scans()
         self.scan_UIDs = self._get_scan_UIDs()
@@ -265,6 +287,14 @@ def date(self) -> str:
     def date(self, value: str):
         self._date = value
 
+    @property
+    def dcm_dir(self) -> str:
+        return self._dcm_dir
+
+    @dcm_dir.setter
+    def dcm_dir(self, value: str):
+        self._dcm_dir = value
+
     def _get_contents(self, data_type):
         children = self.raw_json.get("children", [])
 
@@ -458,16 +488,16 @@ def get_resources(self, xnat_connection):
             resources.extend([item["URI"] for item in resource_list])
         return resources
 
-    def download(self, xnat, dest_folder, zip_name=None):
+    def get_files(self, dest_folder, xnat, zip_name=None):
         """
         Download a zip file containing all data for this session. Returns the
         path to the new file if download is successful, raises an exception if
         not
 
         Args:
-            xnat: An instance of datman.xnat.xnat()
             dest_folder: The absolute path to the folder where the zip
                 should be deposited
+            xnat: An instance of datman.xnat.xnat()
             zip_name: An optional name for the output zip file. If not
                 set the zip name will be session.name
 
@@ -536,10 +566,18 @@ def __init__(self, experiment, scan_json):
         self.type = self._get_field("type")
         self.names = []
         self.tags = []
-        self.download_dir = None
+        self.dcm_dir = None
 
     # Use properties here to conform with SeriesImporter interface
     # and guarantee at creation that expected attributes exist
+    @property
+    def dcm_dir(self) ->str:
+        return self._dcm_dir
+
+    @dcm_dir.property
+    def dcm_dir(self, value: str):
+        self.dcm_dir = value
+
     @property
     def series(self) -> str:
         return self._series
@@ -696,17 +734,17 @@ def is_usable(self, strict=False):
 
         return True
 
-    def download(self, xnat_conn, output_dir):
+    def get_files(self, output_dir, xnat_conn):
         """Download all dicoms for this series.
 
         This will download all files in the series, and if successful,
-        set the download_dir attribute to the destination folder.
+        set the dcm_dir attribute to the destination folder.
 
         Args:
-            xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection
-                to the server to download from.
             output_dir (:obj:`str`): The full path to the location to
                 download all files to.
+            xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection
+                to the server to download from.
 
         Returns:
             bool: True if the series was downloaded, False otherwise.
@@ -714,7 +752,7 @@ def download(self, xnat_conn, output_dir):
         logger.info(f"Downloading dicoms for {self.experiment} series: "
                     f"{self.series}.")
 
-        if self.download_dir:
+        if self.dcm_dir:
             logger.debug(
                 "Data has been previously downloaded, skipping redownload.")
             return True
@@ -756,24 +794,24 @@ def download(self, xnat_conn, output_dir):
         dicom_file = self._find_first_dicom(output_dir)
 
         try:
-            self.download_dir = os.path.dirname(dicom_file)
+            self.dcm_dir = os.path.dirname(dicom_file)
         except TypeError:
             logger.warning("No valid dicom files found in XNAT session "
                            f"{self.subject} series {self.series}.")
             return False
         return True
 
-    def _find_first_dicom(self, download_dir):
+    def _find_first_dicom(self, dcm_dir):
         """Finds a dicom from the series (if any) in the given directory.
 
         Args:
-            download_dir (:obj:`str`): The directory to search for dicoms.
+            dcm_dir (:obj:`str`): The directory to search for dicoms.
 
         Returns:
             str: The full path to a dicom, or None if no readable dicoms
                 exist in the folder.
         """
-        search_dir = self._find_series_dir(download_dir)
+        search_dir = self._find_series_dir(dcm_dir)
         for root_dir, folder, files in os.walk(search_dir):
             for item in files:
                 path = os.path.join(root_dir, item)
@@ -841,15 +879,19 @@ class ZipImporter(SessionImporter):
 
     def __init__(self, ident, zip_path):
         # Would be good to not need ident here...
-        self.ident = ident
+        self._ident = ident
         self.path = zip_path
         self.name = zip_path
-
         self.contents = self.parse_contents()
         self.scans = self.get_scans()
         self.resources = self.contents['resources']
-
-        self.date = self.scans[0].datess
+        self.dcm_dir = os.path.split(self.scans[0].dcm_dir)[0]
+        try:
+            # Convert date to same format XNAT gives
+            self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d"))
+        except ValueError:
+            logger.error("Unexpected date format in dicom header.")
+            self.date = self.scans[0].date
 
     # Use properties here to conform with SessionImporter interface
     # and guarantee at creation that expected attributes exist
@@ -887,21 +929,29 @@ def date(self) -> str:
     def date(self, value: str):
         self._date = value
 
+    @property
+    def dcm_dir(self) -> str:
+        return self._dcm_dir
+
+    @dcm_dir.setter
+    def dcm_dir(self, value: str):
+        self._dcm_dir = value
+
     def is_shared(self) -> bool:
         # Can't track shared sessions with zip files.
         return False
 
-    def extract(self, dest_path: str) -> str:
+    def get_files(self, dest_path: str, *args) -> str:
         """Unpack the zip file at the given location.
 
         Args:
             dest_path (str): The full path to the location to extract into.
         """
         for item in self.scans:
-            item.extract(dest_path)
+            item.get_files(dest_path)
         self.extract_resources(dest_path)
 
-    def extract_resources(self, dest_path: str):
+    def get_resources(self, dest_path: str):
         with ZipFile(self.path, "r") as fh:
             for item in self.resources:
                 fh.extract(item, path=dest_path)
@@ -928,16 +978,39 @@ def parse_contents(self):
     def get_scans(self):
         # Headers = dict[rel_path -> pydicom.dataset.FileDataset]
         headers = get_archive_headers(self.path)
-        scans = []
+        # scans = []
+        # for sub_path in headers:
+        #     # .get_full_subjectid may need to be changed for compatibility
+        #     scans.append(
+        #         ZipSeriesImporter(
+        #             self.ident.get_full_subjectid(), self.path, sub_path,
+        #             headers[sub_path], self.contents['scans'][sub_path]
+        #         )
+        #     )
+        # return scans
+        scans = {}
+        duplicate_series = set()
         for sub_path in headers:
             # .get_full_subjectid may need to be changed for compatibility
-            scans.append(
-                ZipSeriesImporter(
-                    self.ident.get_full_subjectid(), self.path, sub_path,
+            zip_scan = ZipSeriesImporter(
+                    self._ident.get_full_subjectid(), self.path, sub_path,
                     headers[sub_path], self.contents['scans'][sub_path]
-                )
             )
-        return scans
+            if zip_scan.series in scans:
+                duplicate_series.add(zip_scan.series)
+            else:
+                scans[zip_scan.series] = zip_scan
+
+        # Omit scans when more than one has the same series num (can't handle
+        # these...)
+        if duplicate_series:
+            logger.error("Duplicate series present in zip file. "
+                         f"Ignoring: {duplicate_series}")
+
+        for series in duplicate_series:
+            del scans[series]
+
+        return list(scans.values())
 
     def __str__(self):
         return f"<ZipImporter {self.path}"
@@ -948,10 +1021,10 @@ def __repr__(self):
 
 class ZipSeriesImporter(SeriesImporter):
 
-    def __init__(self, subject, zip_file, dcm_dir, header, zip_items):
+    def __init__(self, subject, zip_file, series_dir, header, zip_items):
         self.subject = subject
         self.zip_file = zip_file
-        self.dcm_dir = dcm_dir
+        self.series_dir = series_dir
         self.header = header
         self.contents = zip_items
         self.date = str(header.get('StudyDate'))
@@ -995,11 +1068,11 @@ def names(self) -> list[str]:
     def names(self, value: list[str]):
         self._names = value
 
-    def extract(self, output_dir: str):
+    def get_files(self, output_dir: str, *args):
         with ZipFile(self.zip_file, "r") as fh:
             for item in self.contents:
                 fh.extract(item, path=output_dir)
-        self.download_dir = os.path.join(output_dir, self.dcm_dir)
+        self.dcm_dir = os.path.join(output_dir, self.series_dir)
 
     def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo'
             ) -> list[str]:
@@ -1041,4 +1114,4 @@ def __str__(self):
         return f"<ZipSeriesImporter {self.series} - {self.description}>"
 
     def __repr__(self):
-        return self.__str__()
\ No newline at end of file
+        return self.__str__()

From b5253e7eda4f60c4504390a7b19f2f2df8189e43 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Wed, 12 Mar 2025 13:16:42 -0400
Subject: [PATCH 04/45] [FIX] Bug fixes (name issues, repeated resource
 exports)

---
 bin/dm_xnat_extract.py | 16 ++++++++++------
 datman/exporters.py    |  1 +
 datman/importers.py    | 27 ++++++++++++++++++++++-----
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index fdb10e40..ee2a9880 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -142,7 +142,7 @@ def main():
 
     sessions = get_sessions(config, args)
 
-    logger.info(f"Found {len(session)} sessions for study {args.study}")
+    logger.info(f"Found {len(sessions)} sessions for study {args.study}")
 
     for xnat, importer in sessions:
         session = datman.scan.Scan(importer._ident, config,
@@ -366,9 +366,9 @@ def collect_zips(config, args):
         zip_path = os.path.join(zip_folder, str(ident) + ".zip")
         if not os.path.exists(zip_path):
             logger.error(f"Zip file not found: {zip_path}")
-            return
+            return []
 
-        return [None, datman.importers.ZipImporter(ident, zip_path)]
+        return [(None, datman.importers.ZipImporter(ident, zip_path))]
 
     zip_files = []
     for zip_path in glob.glob(os.path.join(zip_folder, "*.zip")):
@@ -378,7 +378,9 @@ def collect_zips(config, args):
             logger.error(
                 f"Ignoring invalid zip file name in dicom dir: {sess_name}")
             continue
-        zip_files.append([None, datman.importers.ZipImporter(ident, zip_path)])
+        zip_files.append(
+            (None, datman.importers.ZipImporter(ident, zip_path))
+        )
 
     return zip_files
 
@@ -510,7 +512,9 @@ def export_resources(resource_dir, xnat, importer, dry_run=False):
             return
 
     if isinstance(importer, datman.importers.ZipImporter):
-        importer.get_resources(resource_dir)
+        for item in importer.resource_files:
+            if not os.path.exists(item):
+                importer.get_resources(resource_dir, item)
         return
 
     xnat_experiment = importer
@@ -631,7 +635,7 @@ def export_scans(config, xnat, importer, session, bids_opts=None,
         dry_run (bool, optional): If True, no outputs will be made. Defaults
             to False.
     """
-    logger.info(f"Processing scans in experiment {xnat_experiment.name}")
+    logger.info(f"Processing scans in experiment {importer.name}")
 
     importer.assign_scan_names(config, session._ident)
 
diff --git a/datman/exporters.py b/datman/exporters.py
index 1bd77311..339d8b3d 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -31,6 +31,7 @@
                           filter_niftis, find_tech_notes, read_blacklist,
                           get_relative_source, read_json, write_json)
 
+
 try:
     from dcm2bids import dcm2bids, Dcm2bids
     from dcm2bids.sidecar import Acquisition
diff --git a/datman/importers.py b/datman/importers.py
index 9716773c..68256776 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -574,9 +574,9 @@ def __init__(self, experiment, scan_json):
     def dcm_dir(self) ->str:
         return self._dcm_dir
 
-    @dcm_dir.property
+    @dcm_dir.setter
     def dcm_dir(self, value: str):
-        self.dcm_dir = value
+        self._dcm_dir = value
 
     @property
     def series(self) -> str:
@@ -885,10 +885,12 @@ def __init__(self, ident, zip_path):
         self.contents = self.parse_contents()
         self.scans = self.get_scans()
         self.resources = self.contents['resources']
-        self.dcm_dir = os.path.split(self.scans[0].dcm_dir)[0]
+        # For compatibility (fix later)
+        self.resource_files = self.resources
+        self.dcm_dir = os.path.split(self.scans[0].series_dir)[0]
         try:
             # Convert date to same format XNAT gives
-            self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d"))
+            self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d").date())
         except ValueError:
             logger.error("Unexpected date format in dicom header.")
             self.date = self.scans[0].date
@@ -951,8 +953,11 @@ def get_files(self, dest_path: str, *args) -> str:
             item.get_files(dest_path)
         self.extract_resources(dest_path)
 
-    def get_resources(self, dest_path: str):
+    def get_resources(self, dest_path: str, fname: str = None):
         with ZipFile(self.path, "r") as fh:
+            if fname:
+                fh.extract(fname, path=dest_path)
+                return
             for item in self.resources:
                 fh.extract(item, path=dest_path)
 
@@ -1033,9 +1038,18 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items):
         self.uid = str(header.get('StudyInstanceUID'))
         self.image_type = "////".join(header.get("ImageType"))
         self.names = []
+        self.dcm_dir = None
 
     # Use properties here to conform with SeriesImporter interface
     # and guarantee at creation that expected attributes exist
+    @property
+    def dcm_dir(self) -> str:
+        return self._dcm_dir
+
+    @dcm_dir.setter
+    def dcm_dir(self, value):
+        self._dcm_dir = value
+
     @property
     def series(self) -> str:
         return self._series
@@ -1068,6 +1082,9 @@ def names(self) -> list[str]:
     def names(self, value: list[str]):
         self._names = value
 
+    def is_usable(self):
+        return any([item.endswith(".dcm") for item in self.contents])
+
     def get_files(self, output_dir: str, *args):
         with ZipFile(self.zip_file, "r") as fh:
             for item in self.contents:

From ab49032f274274274aa3bb6b5dbe93f9fd5e2a25 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Wed, 12 Mar 2025 19:42:20 -0400
Subject: [PATCH 05/45] [FIX] Handle headers with an empty image_type field

---
 datman/importers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datman/importers.py b/datman/importers.py
index 68256776..ffe44afc 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -1036,7 +1036,10 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items):
         self.series = str(header.get('SeriesNumber'))
         self.description = str(header.get('SeriesDescription'))
         self.uid = str(header.get('StudyInstanceUID'))
-        self.image_type = "////".join(header.get("ImageType"))
+        try:
+            self.image_type = "////".join(header.get("ImageType"))
+        except TypeError:
+            self.image_type = ""
         self.names = []
         self.dcm_dir = None
 

From 390e8ff98e02a80968daf40f820257bf9c44a780 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 13 Mar 2025 16:05:07 -0400
Subject: [PATCH 06/45] [FIX] Allow an ident to be stored with the
 XNATExperiment importer

---
 bin/dm_xnat_extract.py | 12 ++++++++----
 datman/importers.py    |  4 +++-
 datman/xnat.py         | 27 ++++++++++++++++++++++-----
 3 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index ee2a9880..4d77d10d 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -403,7 +403,7 @@ def collect_experiment(config, experiment_id, study, url=None, auth=None):
     if not experiment:
         return []
 
-    return [(xnat, xnat_project, ident)]
+    return [(xnat, experiment)]
 
 
 def get_identifier(config, subid):
@@ -436,8 +436,11 @@ def collect_all_experiments(config, auth=None, url=None):
 
             for exper_id in xnat.get_experiment_ids(project):
                 ident = get_experiment_identifier(config, project, exper_id)
-                if ident:
-                    experiments.append((xnat, project, ident))
+                if not ident:
+                    continue
+                experiment = get_xnat_experiment(xnat, project, ident)
+                if experiment:
+                    experiments.append((xnat, experiment))
 
     return experiments
 
@@ -491,7 +494,8 @@ def get_xnat_experiment(xnat, project, ident):
 
     try:
         xnat_experiment = xnat.get_experiment(
-            project, ident.get_xnat_subject_id(), experiment_label)
+            project, ident.get_xnat_subject_id(), experiment_label,
+            ident=ident)
     except Exception as e:
         logger.error(f"Unable to retrieve experiment {experiment_label} from "
                      f"XNAT server. {type(e).__name__}: {e}")
diff --git a/datman/importers.py b/datman/importers.py
index ffe44afc..27943dc3 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -222,13 +222,15 @@ def __repr__(self):
 
 
 class XNATExperiment(SessionImporter, XNATObject):
-    def __init__(self, project, subject_name, experiment_json):
+    def __init__(self, project, subject_name, experiment_json,
+                 ident=None):
         self.raw_json = experiment_json
         self.project = project
         self.subject = subject_name
         self.uid = self._get_field("UID")
         self.id = self._get_field("ID")
         self.date = self._get_field("date")
+        self._ident = ident
 
         if self.is_shared():
             self.name = [label for label in self.get_alt_labels()
diff --git a/datman/xnat.py b/datman/xnat.py
index e2bbbca9..3cfdc7e1 100644
--- a/datman/xnat.py
+++ b/datman/xnat.py
@@ -10,7 +10,7 @@
 
 import requests
 
-from datman.exceptions import UndefinedSetting, XnatException
+from datman.exceptions import UndefinedSetting, XnatException, InputException
 from datman.importers import XNATSubject, XNATExperiment, XNATScan
 
 
@@ -439,24 +439,41 @@ def get_experiment_ids(self, project, subject=""):
 
         return [item.get("label") for item in result["ResultSet"]["Result"]]
 
-    def get_experiment(self, project, subject_id, exper_id, create=False):
+    def get_experiment(self, project, subject_id=None, exper_id=None,
+                       create=False, ident=None):
         """Get an experiment from the XNAT server.
 
         Args:
             project (:obj:`str`): The XNAT project to search within.
-            subject_id (:obj:`str`): The XNAT subject to search.
-            exper_id (:obj:`str`): The name of the experiment to retrieve.
+            subject_id (:obj:`str`, optional): The XNAT subject to search.
+                Either subject_id and exper_id must both be provided or
+                ident must be given.
+            exper_id (:obj:`str`, optional): The name of the experiment
+                to retrieve. Either subject_id and exper_id must both be
+                provided or ident must be given.
             create (bool, optional): Whether to create an experiment matching
                 exper_id if a match is not found. Defaults to False.
+            ident (:obj:`datman.scanid.Identifier`, optional): a datman
+                identifier. Must be provided if subject_id and exper_id are
+                not given.
 
         Raises:
             XnatException: If the experiment doesn't exist and can't be made
                 or the server/API can't be accessed.
+            InputException: If not given both subject_id and exper_id OR
+                ident as arguments.
 
         Returns:
             :obj:`datman.xnat.XNATExperiment`: An XNATExperiment instance
                 matching the given experiment ID.
         """
+        if not (subject_id and exper_id):
+            if not ident:
+                raise InputException(
+                    "Must be given either 1) subject ID and "
+                    "experiment ID or 2) A datman.scanid.Identifier")
+            subject_id = ident.get_xnat_subject_id()
+            exper_id = ident.get_xnat_experiment_id()
         logger.debug(
             f"Querying XNAT server {self.server} for experiment {exper_id} "
             f"belonging to {subject_id} in project {project}")
@@ -486,7 +503,7 @@ def get_experiment(self, project, subject_id, exper_id, create=False):
             raise XnatException(
                 f"Could not access metadata for experiment {exper_id}")
 
-        return XNATExperiment(project, subject_id, exper_json)
+        return XNATExperiment(project, subject_id, exper_json, ident=ident)
 
     def make_experiment(self, project, subject, experiment):
         """Make a new (empty) experiment on the XNAT server.

From 4240cfed7b9d0f48918677e55db7882f35dc91c2 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 13 Mar 2025 18:26:43 -0400
Subject: [PATCH 07/45] [FIX] Renamed SessionImporter dcm_dir -> dcm_subdir

SessionImporter and SeriesImporter both used dcm_dir but for
different functions (and with SeriesImporter sometimes being None).

Renamed SessionImporter variable to better reflect differences.
---
 datman/exporters.py |  2 +-
 datman/importers.py | 35 ++++++++++++++++++++++-------------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index 339d8b3d..b0ed0cd5 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -184,7 +184,7 @@ class BidsExporter(SessionExporter):
     type = "bids"
 
     def __init__(self, config, session, experiment, bids_opts=None, **kwargs):
-        self.dcm_dir = experiment.dcm_dir
+        self.dcm_dir = experiment.dcm_subdir
         self.bids_sub = session._ident.get_bids_name()
         self.bids_ses = session._ident.timepoint
         self.repeat = session._ident.session
diff --git a/datman/importers.py b/datman/importers.py
index 27943dc3..4650025a 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -73,8 +73,10 @@ def scans(self) -> list['SeriesImporter']:
 
     @property
     @abstractmethod
-    def dcm_dir(self) -> str:
+    def dcm_subdir(self) -> str:
         """The subfolder that will hold the session's dicom dirs.
+
+        This will be a relative path, and will always be defined.
         """
         pass
 
@@ -128,6 +130,9 @@ class SeriesImporter(ABC):
     @abstractmethod
     def dcm_dir(self) -> str:
         """Full path to the folder that holds a local copy of the dicom files.
+
+        This should be None if the dicoms have not been retrieved from their
+        source location (e.g. with get_files).
         """
         pass
 
@@ -166,6 +171,10 @@ def names(self) -> list[str]:
         """
         pass
 
+    @abstractmethod
+    def is_usable(self) -> bool:
+        pass
+
     @abstractmethod
     def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo'
         ) -> list[str]:
@@ -241,7 +250,7 @@ def __init__(self, project, subject_name, experiment_json,
             self.source_name = self.name
 
         # The subdirectory to find the dicoms in after download
-        self.dcm_dir = os.path.join(self.name, "scans")
+        self.dcm_subdir = os.path.join(self.name, "scans")
 
         # Scan attributes
         self.scans = self._get_scans()
@@ -290,12 +299,12 @@ def date(self, value: str):
         self._date = value
 
     @property
-    def dcm_dir(self) -> str:
-        return self._dcm_dir
+    def dcm_subdir(self) -> str:
+        return self._dcm_subdir
 
-    @dcm_dir.setter
-    def dcm_dir(self, value: str):
-        self._dcm_dir = value
+    @dcm_subdir.setter
+    def dcm_subdir(self, value: str):
+        self._dcm_subdir = value
 
     def _get_contents(self, data_type):
         children = self.raw_json.get("children", [])
@@ -889,7 +898,7 @@ def __init__(self, ident, zip_path):
         self.resources = self.contents['resources']
         # For compatibility (fix later)
         self.resource_files = self.resources
-        self.dcm_dir = os.path.split(self.scans[0].series_dir)[0]
+        self.dcm_subdir = os.path.split(self.scans[0].series_dir)[0]
         try:
             # Convert date to same format XNAT gives
             self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d").date())
@@ -934,12 +943,12 @@ def date(self, value: str):
         self._date = value
 
     @property
-    def dcm_dir(self) -> str:
-        return self._dcm_dir
+    def dcm_subdir(self) -> str:
+        return self._dcm_subdir
 
-    @dcm_dir.setter
-    def dcm_dir(self, value: str):
-        self._dcm_dir = value
+    @dcm_subdir.setter
+    def dcm_subdir(self, value: str):
+        self._dcm_subdir = value
 
     def is_shared(self) -> bool:
         # Can't track shared sessions with zip files.

From a8d0f34ab8432c05c2f606fa666bb525d99b801b Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 13 Mar 2025 20:38:00 -0400
Subject: [PATCH 08/45] [FIX] Rename some vars, add missing items to interface
 classes

---
 bin/dm_xnat_extract.py     |   2 +-
 bin/xnat_fetch_sessions.py |   2 +-
 datman/importers.py        | 205 ++++++++++++++++++++++++++-----------
 3 files changed, 148 insertions(+), 61 deletions(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index 4d77d10d..e0be9226 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -145,7 +145,7 @@ def main():
     logger.info(f"Found {len(sessions)} sessions for study {args.study}")
 
     for xnat, importer in sessions:
-        session = datman.scan.Scan(importer._ident, config,
+        session = datman.scan.Scan(importer.ident, config,
                                    bids_root=args.bids_out)
 
         if importer.resource_files:
diff --git a/bin/xnat_fetch_sessions.py b/bin/xnat_fetch_sessions.py
index 05c5b437..6a4a8acb 100755
--- a/bin/xnat_fetch_sessions.py
+++ b/bin/xnat_fetch_sessions.py
@@ -186,7 +186,7 @@ def update_needed(zip_file, experiment, xnat):
 
     zip_scan_uids = get_scan_uids(zip_headers)
     zip_resources = get_resources(zip_file)
-    xnat_resources = experiment.get_resources(xnat)
+    xnat_resources = experiment.get_resource_uris(xnat)
 
     if not files_downloaded(zip_resources, xnat_resources) or \
        not files_downloaded(zip_scan_uids, experiment.scan_UIDs):
diff --git a/datman/importers.py b/datman/importers.py
index 4650025a..33049ea9 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -38,6 +38,13 @@ class SessionImporter(ABC):
     #   experiment.resource_IDs (dict of folder names to numerical IDs)
     #           e.g. {'behav': '297528', 'misc': '305312'}
 
+    @property
+    @abstractmethod
+    def ident(self) -> 'datman.scanid.Identifier':
+        """A datman identifier for the session.
+        """
+        pass
+
     @property
     @abstractmethod
     def name(self) -> str:
@@ -67,7 +74,14 @@ def date(self) -> str:
     @property
     @abstractmethod
     def scans(self) -> list['SeriesImporter']:
-        """A list scan series that belong to the session.
+        """A list of scan SeriesImporters that belong to the session.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def resource_files(self) -> list[str]:
+        """A list of relative paths for any resource (non-dcm) files.
         """
         pass
 
@@ -86,6 +100,12 @@ def is_shared(self) -> bool:
         """
         pass
 
+    @abstractmethod
+    def get_files(self, dest_dir, *args, **kwargs):
+        """Retrieve all of the session's dcm files and place them in dest_dir.
+        """
+        pass
+
     def assign_scan_names(self, config, ident):
         """Assign a datman style name to each scan in this experiment.
 
@@ -171,8 +191,31 @@ def names(self) -> list[str]:
         """
         pass
 
+    @property
+    @abstractmethod
+    def image_type(self) -> str:
+        """The ImageType from the dicom headers.
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def uid(self) -> str:
+        """The UID from the dicom headers.
+        """
+        pass
+
+
     @abstractmethod
     def is_usable(self) -> bool:
+        """Indicates whether the series contains usable dcm files.
+        """
+        pass
+
+    @abstractmethod
+    def get_files(self, dest_dir, *args, **kwargs):
+        """Retrieve dcm files for this series and store them in dest_dir.
+        """
         pass
 
     @abstractmethod
@@ -180,6 +223,10 @@ def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo'
         ) -> list[str]:
         pass
 
+    @abstractmethod
+    def set_tag(self, tag_map):
+        pass
+
     def _mangle_descr(self) -> str:
         """Modify a series description to remove non-alphanumeric characters.
         """
@@ -187,6 +234,16 @@ def _mangle_descr(self) -> str:
             return ""
         return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description)
 
+    def is_derived(self):
+        if not self.image_type:
+            logger.warning(
+                f"Image type could not be found for series {self.series}. "
+                "Assuming it's not derived.")
+            return False
+        if "DERIVED" in self.image_type:
+            return True
+        return False
+
 
 ###############################################################################
 #### XNAT classes, formerly in xnat.py
@@ -239,7 +296,7 @@ def __init__(self, project, subject_name, experiment_json,
         self.uid = self._get_field("UID")
         self.id = self._get_field("ID")
         self.date = self._get_field("date")
-        self._ident = ident
+        self.ident = ident
 
         if self.is_shared():
             self.name = [label for label in self.get_alt_labels()
@@ -264,8 +321,14 @@ def __init__(self, project, subject_name, experiment_json,
         # Misc - basically just OPT CU1 needs this
         self.misc_resource_IDs = self._get_other_resource_IDs()
 
-    # Use properties here to conform with SessionImporter interface
-    # and guarantee at creation that expected attributes exist
+    @property
+    def ident(self) -> 'datman.scanid.Identifier':
+        return self._ident
+
+    @ident.setter
+    def ident(self, value: 'datman.scanid.Identifier'):
+        self._ident = value
+
     @property
     def name(self) -> str:
         return self._name
@@ -486,7 +549,7 @@ def get_autorun_ids(self, xnat):
 
         return wf_ids
 
-    def get_resources(self, xnat_connection):
+    def get_resource_uris(self, xnat_connection):
         """
         Returns a list of all resource URIs from this session.
         """
@@ -499,7 +562,7 @@ def get_resources(self, xnat_connection):
             resources.extend([item["URI"] for item in resource_list])
         return resources
 
-    def get_files(self, dest_folder, xnat, zip_name=None):
+    def get_files(self, dest_folder, xnat, *args, zip_name=None, **kwargs):
         """
         Download a zip file containing all data for this session. Returns the
         path to the new file if download is successful, raises an exception if
@@ -621,6 +684,14 @@ def names(self) -> list[str]:
     def names(self, value: list[str]):
         self._names = value
 
+    @property
+    def uid(self) -> list[str]:
+        return self._uid
+
+    @uid.setter
+    def uid(self, value: list[str]):
+        self._uid = value
+
     def _set_description(self):
         series_descr = self._get_field("series_description")
         if series_descr:
@@ -645,16 +716,6 @@ def raw_dicoms_exist(self):
                     return True
         return False
 
-    def is_derived(self):
-        if not self.image_type:
-            logger.warning(
-                f"Image type could not be found for series {self.series}. "
-                "Assuming it's not derived.")
-            return False
-        if "DERIVED" in self.image_type:
-            return True
-        return False
-
     def set_tag(self, tag_map):
         matches = {}
         for tag, pattern in tag_map.items():
@@ -745,7 +806,7 @@ def is_usable(self, strict=False):
 
         return True
 
-    def get_files(self, output_dir, xnat_conn):
+    def get_files(self, output_dir, xnat_conn, *args, **kwargs):
         """Download all dicoms for this series.
 
         This will download all files in the series, and if successful,
@@ -889,25 +950,23 @@ def __repr__(self):
 class ZipImporter(SessionImporter):
 
     def __init__(self, ident, zip_path):
-        # Would be good to not need ident here...
-        self._ident = ident
-        self.path = zip_path
+        self.ident = ident
         self.name = zip_path
+        self.path = zip_path
+        self.date = self.scans[0].date
         self.contents = self.parse_contents()
         self.scans = self.get_scans()
-        self.resources = self.contents['resources']
-        # For compatibility (fix later)
-        self.resource_files = self.resources
+        self.resource_files = self.contents['resources']
         self.dcm_subdir = os.path.split(self.scans[0].series_dir)[0]
-        try:
-            # Convert date to same format XNAT gives
-            self.date = str(datetime.strptime(self.scans[0].date, "%Y%m%d").date())
-        except ValueError:
-            logger.error("Unexpected date format in dicom header.")
-            self.date = self.scans[0].date
 
-    # Use properties here to conform with SessionImporter interface
-    # and guarantee at creation that expected attributes exist
+    @property
+    def ident(self) -> 'datman.scanid.Identifier':
+        return self._ident
+
+    @ident.setter
+    def ident(self, value: 'datman.scanid.Identifier'):
+        self._ident = value
+
     @property
     def name(self) -> str:
         return self._name
@@ -918,14 +977,27 @@ def name(self, value: str):
 
     @property
     def source_name(self) -> str:
-        # When using zip files, can't really track shared IDs so always
-        # equal name.
+        # When using zip files, can't really track shared IDs so it always
+        # equals name.
         return self.name
 
     @source_name.setter
     def source_name(self, value: str):
         self.name = value
 
+    @property
+    def date(self) -> str:
+        return self._date
+
+    @date.setter
+    def date(self, value: str):
+        try:
+            # Convert date from usual header format to expected date format
+            self._date = str(datetime.strptime(value, "%Y%m%d").date())
+        except ValueError:
+            logger.error(f"Unexpected date format given - {value}")
+            self._date = value
+
     @property
     def scans(self) -> list['SeriesImporter']:
         return self._scans
@@ -935,12 +1007,12 @@ def scans(self, value: list['SeriesImporter']):
         self._scans = value
 
     @property
-    def date(self) -> str:
-        return self._date
+    def resource_files(self) -> list[str]:
+        return self._resources
 
-    @date.setter
-    def date(self, value: str):
-        self._date = value
+    @resource_files.setter
+    def resource_files(self, value):
+        self._resources = value
 
     @property
     def dcm_subdir(self) -> str:
@@ -954,7 +1026,7 @@ def is_shared(self) -> bool:
         # Can't track shared sessions with zip files.
         return False
 
-    def get_files(self, dest_path: str, *args) -> str:
+    def get_files(self, dest_path: str, *args, **kwargs):
         """Unpack the zip file at the given location.
 
         Args:
@@ -965,14 +1037,21 @@ def get_files(self, dest_path: str, *args) -> str:
         self.extract_resources(dest_path)
 
     def get_resources(self, dest_path: str, fname: str = None):
+        """Unpack resource (non-dicom) files at the given location.
+
+        Args:
+            dest_path (str): The full path to the location to extract into.
+        """
         with ZipFile(self.path, "r") as fh:
             if fname:
                 fh.extract(fname, path=dest_path)
                 return
-            for item in self.resources:
+            for item in self.resources_files:
                 fh.extract(item, path=dest_path)
 
-    def parse_contents(self):
+    def parse_contents(self) -> dict:
+        """Read and organize the contents of the zip file.
+        """
         contents = {
             'scans': {},
             'resources': []
@@ -991,25 +1070,16 @@ def parse_contents(self):
                             item.filename)
         return contents
 
-    def get_scans(self):
-        # Headers = dict[rel_path -> pydicom.dataset.FileDataset]
+    def get_scans(self) -> list['ZipSeriesImporter']:
+        """Get ZipSeriesImporters for each scan in the session.
+        """
         headers = get_archive_headers(self.path)
-        # scans = []
-        # for sub_path in headers:
-        #     # .get_full_subjectid may need to be changed for compatibility
-        #     scans.append(
-        #         ZipSeriesImporter(
-        #             self.ident.get_full_subjectid(), self.path, sub_path,
-        #             headers[sub_path], self.contents['scans'][sub_path]
-        #         )
-        #     )
-        # return scans
         scans = {}
         duplicate_series = set()
         for sub_path in headers:
             # .get_full_subjectid may need to be changed for compatibility
             zip_scan = ZipSeriesImporter(
-                    self._ident.get_full_subjectid(), self.path, sub_path,
+                    self.ident.get_full_subjectid(), self.path, sub_path,
                     headers[sub_path], self.contents['scans'][sub_path]
             )
             if zip_scan.series in scans:
@@ -1047,10 +1117,7 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items):
         self.series = str(header.get('SeriesNumber'))
         self.description = str(header.get('SeriesDescription'))
         self.uid = str(header.get('StudyInstanceUID'))
-        try:
-            self.image_type = "////".join(header.get("ImageType"))
-        except TypeError:
-            self.image_type = ""
+        self.image_type = header.get("ImageType")
         self.names = []
         self.dcm_dir = None
 
@@ -1096,10 +1163,30 @@ def names(self) -> list[str]:
     def names(self, value: list[str]):
         self._names = value
 
+    @property
+    def image_type(self) -> str:
+        return self._image_type
+
+    @image_type.setter
+    def image_type(self, value):
+        try:
+            # Ensure matches the expected XNAT format
+            self._image_type = "////".join(value)
+        except TypeError:
+            self._image_type = ""
+
+    @property
+    def uid(self) -> list[str]:
+        return self._uid
+
+    @uid.setter
+    def uid(self, value: list[str]):
+        self._uid = value
+
     def is_usable(self):
         return any([item.endswith(".dcm") for item in self.contents])
 
-    def get_files(self, output_dir: str, *args):
+    def get_files(self, output_dir: str, *args, **kwargs):
         with ZipFile(self.zip_file, "r") as fh:
             for item in self.contents:
                 fh.extract(item, path=output_dir)

From ac8cb2054c3c503125f641709871351b0a6ad6eb Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 13 Mar 2025 21:29:28 -0400
Subject: [PATCH 09/45] [CONF] Update pylint settings to ignore more messages

---
 pyproject.toml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 65a395a4..7ed546dd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -109,7 +109,12 @@ testpaths = ["tests"]
 fail-under = 7
 ignore-paths = ['docs']
 
+[tool.pylint.logging]
+logging-format-style = "new"
+
 [tool.pylint."messages control"]
 disable = [
-  "logging-format-interpolation"
+  "logging-format-interpolation",
+  "too-many-lines",
+  "logging-fstring-interpolation"
 ]

From e7e175da87cae0cc63bf2bdc616c16d1b71727ed Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 14 Mar 2025 21:16:24 -0400
Subject: [PATCH 10/45] [PEP8] Fix formatting issues

---
 bin/dm_xnat_extract.py     | 138 ++++++++++++--
 bin/dm_xnat_upload.py      |   7 +-
 bin/xnat_fetch_sessions.py |   6 +-
 datman/importers.py        | 308 ++++++++++++++++--------------
 datman/xnat.py             | 375 ++++++++++++++++++-------------------
 pyproject.toml             |   9 +-
 6 files changed, 498 insertions(+), 345 deletions(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index e0be9226..c672d311 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -50,7 +50,6 @@
 import platform
 import shutil
 import sys
-from zipfile import BadZipFile
 
 import datman.config
 import datman.exceptions
@@ -160,6 +159,8 @@ def main():
 
 
 def read_args():
+    """Configure the ArgumentParser.
+    """
     def _is_dir(path, parser):
         """Ensure a given directory exists."""
         if path is None or not os.path.isdir(path):
@@ -316,6 +317,12 @@ def get_log_level(args):
 
 
 def configure_logging(study, log_level):
+    """Configure the logging for this run.
+
+    Args:
+        study (:obj:`str`): The name of the study being exported.
+        log_level (:obj:`str`): The log level to use.
+    """
     ch = logging.StreamHandler(sys.stdout)
 
     log_level = getattr(logging, log_level)
@@ -332,9 +339,23 @@ def configure_logging(study, log_level):
     logging.getLogger('datman.dashboard').addHandler(ch)
     logging.getLogger('datman.xnat').addHandler(ch)
     logging.getLogger('datman.exporters').addHandler(ch)
+    logging.getLogger('datman.importers').addHandler(ch)
 
 
 def get_sessions(config, args):
+    """Get all scan sessions to be exported.
+
+    Args:
+        config (:obj:`datman.config.config`): The datman configuration.
+        args (:obj:`argparse.ArgumentParser`): The argument parser for the
+            user's input arguments.
+
+    Returns:
+        list[(None|datman.xnat.XNAT, datman.importers.SessionImporter)]:
+            a list of tuples containing the XNAT connection to use (if needed
+            during export) and a SessionImporter. If no sessions are found,
+            will return an empty list.
+    """
     if args.use_zips != "USE_XNAT":
         return collect_zips(config, args)
 
@@ -348,6 +369,19 @@ def get_sessions(config, args):
 
 
 def collect_zips(config, args):
+    """Locate all usable zip files.
+
+    Args:
+        config (:obj:`datman.config.config`): The datman configuration.
+        args (:obj:argparse.ArgumentParser): The argument parser for the
+            user's command line inputs.
+
+    Returns:
+        list[(None, datman.importers.ZipImporter)]: A list of tuples each
+            containing None (for compatibility with exporting XNATExperiments)
+            and a ZipImporter. Will return an empty list if no zip files are
+            found.
+    """
     if args.use_zips is None:
         zip_folder = config.get_path("dicom")
     else:
@@ -386,6 +420,24 @@ def collect_zips(config, args):
 
 
 def collect_experiment(config, experiment_id, study, url=None, auth=None):
+    """Get a single XNAT experiment.
+
+    Args:
+        config (:obj:`datman.config.config`): A datman configuration object.
+        experiment_id (:obj:`str`): An XNAT experiment ID.
+        study (:obj:`str`): A valid study ID.
+        url (:obj:`str`, optional): The XNAT url to use. If not given, it
+            will be retrieved from the configuration files.
+        auth (:obj:`tuple`, optional): A tuple containing the username and
+            password to use when accessing the XNAT server. If not given,
+            the XNAT_USER and XNAT_PASS environment variables will be used.
+
+    Return:
+        list[(datman.xnat.XNAT, datman.importers.XNATExperiment)]:
+            a list with a single tuple containing the xnat connection to use
+            and the experiment importer. If not found, an empty list will be
+            given.
+    """
     ident = get_identifier(config, experiment_id)
     xnat = datman.xnat.get_connection(
         config, site=ident.site, url=url, auth=auth)
@@ -407,6 +459,17 @@ def collect_experiment(config, experiment_id, study, url=None, auth=None):
 
 
 def get_identifier(config, subid):
+    """Get a valid identifier for a given ID.
+
+    Args:
+        config (:obj:`datman.config.config`): A datman configuration object
+            for a study.
+        subid (:obj:`str`): A valid identifier in one of datman's accepted name
+            conventions.
+
+    Returns:
+        datman.scanid.Identifier: A datman Identifier for the given subid.
+    """
     ident = validate_subject_id(subid, config)
 
     try:
@@ -425,6 +488,20 @@ def get_identifier(config, subid):
 
 
 def collect_all_experiments(config, auth=None, url=None):
+    """Retrieve all XNAT experiment objects for a single study.
+
+    Args:
+        config (:obj:`datman.config.config`): A datman configuration object
+            for the current study.
+        auth (:obj:`tuple`, optional): A tuple containing an XNAT username and
+            password. If not provided, the XNAT_USER and XNAT_PASS variables
+            will be used. Defaults to None.
+        url (:obj:`str`): The URL for the XNAT server.
+
+    Returns:
+        list[datman.importers.XNATExperiment]: A list of XNATExperiment
+            importers for all experiments belonging to the config's study.
+    """
     experiments = []
     server_cache = {}
 
@@ -446,21 +523,31 @@ def collect_all_experiments(config, auth=None, url=None):
 
 
 def get_experiment_identifier(config, project, experiment_id):
+    """Get a valid datman identifier for an experiment found on XNAT.
+
+    Args:
+        config (:obj:`datman.config.config`): A datman configuration object.
+        project (:obj:`str`): The name of a project on XNAT.
+        experiment_id (:obj:`str`): The name of an experiment found on XNAT.
+
+    Returns:
+        :obj:`datman.scanid.Identifier` or None if experiment_id is invalid.
+    """
     try:
         ident = validate_subject_id(experiment_id, config)
     except datman.scanid.ParseException:
         logger.error(f"Invalid XNAT experiment ID {experiment_id} in project "
                      f"{project}. Please update XNAT with correct ID.")
-        return
+        return None
 
     if ident.session is None and not datman.scanid.is_phantom(ident):
         logger.error(f"Invalid experiment ID {experiment_id} in project "
                      f"{project}. Reason - Not a phantom, but missing session "
                      "number")
-        return
+        return None
 
     if ident.modality != "MR":
-        return
+        return None
 
     return ident
 
@@ -488,6 +575,18 @@ def get_projects(config):
 
 
 def get_xnat_experiment(xnat, project, ident):
+    """Retrieve information about an XNAT experiment.
+
+    Args:
+        xnat (:obj:`datman.xnat.XNAT`): A connection to an XNAT server.
+        project (:obj:`str`): The name of the XNAT project the experiment
+            belongs to.
+        ident (:obj:`datman.scanid.Identifier`): A datman identifier for the
+            experiment.
+
+    Returns:
+        :obj:`datman.importers.XNATExperiment` or None if not found.
+    """
     experiment_label = ident.get_xnat_experiment_id()
 
     logger.info(f"Retrieving experiment: {experiment_label}")
@@ -499,11 +598,22 @@ def get_xnat_experiment(xnat, project, ident):
     except Exception as e:
         logger.error(f"Unable to retrieve experiment {experiment_label} from "
                      f"XNAT server. {type(e).__name__}: {e}")
-        return
+        return None
     return xnat_experiment
 
 
 def export_resources(resource_dir, xnat, importer, dry_run=False):
+    """Export all resource (non-dicom) files for a scan session.
+
+    Args:
+        resource_dir (:obj:`str`): The absolute path to where resources
+            should be exported.
+        xnat (:obj:`datman.xnat.XNAT`): A connection to an XNAT server.
+        importer (:obj:`datman.importers.SessionImporter`): An importer for
+            the scan session to export resources for.
+        dry_run (bool, optional): Report changes that would be made without
+            modifying anything.  Defaults to False.
+    """
     logger.info(f"Extracting {len(importer.resource_files)} resources "
                 f"from {importer.name}")
 
@@ -523,7 +633,7 @@ def export_resources(resource_dir, xnat, importer, dry_run=False):
 
     xnat_experiment = importer
 
-    for label in xnat_experiment.resource_IDs:
+    for label in xnat_experiment.resource_ids:
         if label == "No Label":
             target_path = os.path.join(resource_dir, "MISC")
         else:
@@ -535,7 +645,7 @@ def export_resources(resource_dir, xnat, importer, dry_run=False):
             logger.error(f"Failed creating target folder: {target_path}")
             continue
 
-        xnat_resource_id = xnat_experiment.resource_IDs[label]
+        xnat_resource_id = xnat_experiment.resource_ids[label]
 
         try:
             resources = xnat.get_resource_list(xnat_experiment.project,
@@ -575,7 +685,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id,
     if dry_run:
         logger.info(f"DRY RUN: Skipping download of {xnat_resource_uri} to "
                     f"{target_path}")
-        return
+        return None
 
     try:
         source = xnat.get_resource(xnat_experiment.project,
@@ -587,7 +697,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id,
     except Exception as e:
         logger.error("Failed downloading resource archive from "
                      f"{xnat_experiment.name} with reason: {e}")
-        return
+        return None
 
     # check that the target path exists
     target_dir = os.path.split(target_path)[0]
@@ -596,7 +706,7 @@ def download_resource(xnat, xnat_experiment, xnat_resource_id,
             os.makedirs(target_dir)
         except OSError:
             logger.error(f"Failed to create directory: {target_dir}")
-            return
+            return None
 
     # copy the downloaded file to the target location
     try:
@@ -700,6 +810,7 @@ def make_session_exporters(config, session, experiment, bids_opts=None,
 
     exporters = []
     for exp_format in formats:
+        # pylint: disable-next=invalid-name
         Exporter = datman.exporters.get_exporter(exp_format, scope="session")
         exporters.append(
             Exporter(config, session, experiment, bids_opts=bids_opts,
@@ -771,6 +882,8 @@ def make_all_series_exporters(config, session, experiment, bids_opts=None,
 
 
 def get_tag_settings(config, site):
+    """Get configuration for all tags defined for a specific site.
+    """
     try:
         tags = config.get_tags(site=site)
     except datman.exceptions.UndefinedSetting:
@@ -819,6 +932,7 @@ def make_series_exporters(session, scan, tag_config, config, wanted_tags=None,
 
         logger.debug(f"Found export formats {formats} for {scan}")
         for exp_format in formats:
+            # pylint: disable-next=invalid-name
             Exporter = datman.exporters.get_exporter(
                 exp_format, scope="series")
 
@@ -855,14 +969,14 @@ def is_blacklisted(scan_name, config):
 def needs_raw(session_exporters):
     """Returns true if raw data is needed to run any session exporters.
     """
-    return any([exp.needs_raw_data() for exp in session_exporters])
+    return any(exp.needs_raw_data() for exp in session_exporters)
 
 
 def needs_export(session_exporters):
     """Returns True if any session exporters need to be run.
     """
     try:
-        return any([not exp.outputs_exist() for exp in session_exporters])
+        return any(not exp.outputs_exist() for exp in session_exporters)
     except ValueError:
         # ValueError is raised when an invalid series number exists on XNAT.
         # Skip these sessions
diff --git a/bin/dm_xnat_upload.py b/bin/dm_xnat_upload.py
index c9eef59b..ef3c6686 100755
--- a/bin/dm_xnat_upload.py
+++ b/bin/dm_xnat_upload.py
@@ -277,8 +277,9 @@ def resource_data_exists(xnat_resources, archive):
                                if zf.read(item)]
     empty_files = list(set(local_resources) - set(local_resources_mod))
     if empty_files:
-        logger.warning("Cannot upload empty resource files {}, omitting."
-                    "".format(", ".join(empty_files)))
+        logger.warning(
+            f"Cannot upload empty resource files {', '.join(empty_files)}, "
+            "omitting.")
     # paths in xnat are url encoded. Need to fix local paths to match
     local_resources_mod = [urllib.request.pathname2url(p)
                            for p in local_resources_mod]
@@ -299,7 +300,7 @@ def scan_data_exists(xnat_experiment, local_headers):
     if xnat_experiment.uid not in local_experiment_ids:
         raise ValueError("Experiment UID doesnt match XNAT")
 
-    if not set(local_scan_uids).issubset(set(xnat_experiment.scan_UIDs)):
+    if not set(local_scan_uids).issubset(set(xnat_experiment.scan_uids)):
         logger.info("Found UIDs for {} not yet added to xnat".format(
             xnat_experiment.name))
         return False
diff --git a/bin/xnat_fetch_sessions.py b/bin/xnat_fetch_sessions.py
index 6a4a8acb..cc7a8263 100755
--- a/bin/xnat_fetch_sessions.py
+++ b/bin/xnat_fetch_sessions.py
@@ -85,7 +85,7 @@ def main():
         logger.setLevel(logging.ERROR)
 
     if not study:
-        with datman.xnat.xnat(xnat_server, username, password) as xnat:
+        with datman.xnat.XNAT(xnat_server, username, password) as xnat:
             download_subjects(xnat, xnat_project, destination)
         return
 
@@ -104,7 +104,7 @@ def main():
             logger.error("{}".format(e))
             continue
         username, password = get_credentials(credentials_file)
-        with datman.xnat.xnat(server, username, password) as xnat:
+        with datman.xnat.XNAT(server, username, password) as xnat:
             download_subjects(xnat, project, destination)
 
 
@@ -189,7 +189,7 @@ def update_needed(zip_file, experiment, xnat):
     xnat_resources = experiment.get_resource_uris(xnat)
 
     if not files_downloaded(zip_resources, xnat_resources) or \
-       not files_downloaded(zip_scan_uids, experiment.scan_UIDs):
+       not files_downloaded(zip_scan_uids, experiment.scan_uids):
         logger.error("Some of XNAT contents for {} is missing from file "
                      "system. Zip file will be deleted and recreated"
                      "".format(experiment.name))
diff --git a/datman/importers.py b/datman/importers.py
index 33049ea9..c6d0bb0f 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -4,6 +4,7 @@
 uses these classes to create a uniform interface for its exporters, which
 create the files and database contents users may actually interact with.
 """
+
 from abc import ABC, abstractmethod
 from datetime import datetime
 import glob
@@ -12,9 +13,9 @@
 import os
 import re
 import shutil
-from zipfile import ZipFile
+from zipfile import ZipFile, BadZipFile
 
-from datman.exceptions import ParseException, XnatException, InputException
+from datman.exceptions import ParseException, XnatException
 from datman.utils import is_dicom, get_archive_headers
 
 
@@ -22,35 +23,20 @@
 
 
 class SessionImporter(ABC):
-
-    # Exporters currently use these from XNATExperiment:
-    # experiment.name
-    # experiment.source_name (related to sharing data)
-    # experiment.scans
-    # experiment.date
-    # experiment.is_shared()
-
-    # Missed but possibly needed attributes (from extract):
-    #   experiment.assign_scan_names(config, ident)
-    #
-    # Maybe we really just need a resource exporter class...
-    #   experiment.resource_files (list of dicts)
-    #   experiment.resource_IDs (dict of folder names to numerical IDs)
-    #           e.g. {'behav': '297528', 'misc': '305312'}
+    """An interface for importing a whole scan session into datman.
+    """
 
     @property
     @abstractmethod
     def ident(self) -> 'datman.scanid.Identifier':
         """A datman identifier for the session.
         """
-        pass
 
     @property
     @abstractmethod
     def name(self) -> str:
         """A valid ID for the scan session being imported.
         """
-        pass
 
     @property
     @abstractmethod
@@ -62,28 +48,24 @@ def source_name(self) -> str:
         corresponds to it's original ID. This will be equal to 'name' when
         the session is not shared or sharing is not being tracked.
         """
-        pass
 
     @property
     @abstractmethod
     def date(self) -> str:
         """A string representation (YYYY-MM-DD) of the scan collection date.
         """
-        pass
 
     @property
     @abstractmethod
     def scans(self) -> list['SeriesImporter']:
         """A list of scan SeriesImporters that belong to the session.
         """
-        pass
 
     @property
     @abstractmethod
     def resource_files(self) -> list[str]:
         """A list of relative paths for any resource (non-dcm) files.
         """
-        pass
 
     @property
     @abstractmethod
@@ -92,19 +74,16 @@ def dcm_subdir(self) -> str:
 
         This will be a relative path, and will always be defined.
         """
-        pass
 
     @abstractmethod
     def is_shared(self) -> bool:
         """Indicates whether the session is shared with other projects.
         """
-        pass
 
     @abstractmethod
-    def get_files(self, dest_dir, *args, **kwargs):
+    def get_files(self, dest_dir: str, *args, **kwargs):
         """Retrieve all of the session's dcm files and place them in dest_dir.
         """
-        pass
 
     def assign_scan_names(self, config, ident):
         """Assign a datman style name to each scan in this experiment.
@@ -128,7 +107,7 @@ def assign_scan_names(self, config, ident):
         for scan in self.scans:
             try:
                 scan.set_datman_name(str(ident), tags)
-            except Exception as e:
+            except (ParseException, TypeError, KeyError) as e:
                 logger.info(
                     f"Failed to make file name for series {scan.series} "
                     f"in session {str(ident)}. Reason {type(e).__name__}: "
@@ -136,16 +115,9 @@ def assign_scan_names(self, config, ident):
 
 
 class SeriesImporter(ABC):
-    # XNATScan attributes and methods used by exporters...
-    # .series
-    # .subject (FakeSideCar needs)
-    # .names
-    # .description
-
-    # MISSED (may have missed more in dm_xnat_extract):
-    #   scan.download_dir
-    #       xnat copy for example points to: /scratch/dawn/temp_stuff/export_zip/xnat_copy/SPN10_CMH_0083_01_SE01_MR/scans/6-t1_mprage_T1_900/resources/DICOM/files
-    #       unzipped copy would be (diff session): 20190116_Ex09352_ASND1MR_ASQB002/Ex09352_Se00003_SagT1Bravo-1mm-32ch/
+    """An interface for importing a single dcm series into datman.
+    """
+
     @property
     @abstractmethod
     def dcm_dir(self) -> str:
@@ -154,7 +126,6 @@ def dcm_dir(self) -> str:
         This should be None if the dicoms have not been retrieved from their
         source location (e.g. with get_files).
         """
-        pass
 
     @property
     @abstractmethod
@@ -164,7 +135,6 @@ def series(self) -> str:
         This should be a string because sometimes the 'number' comes with
         non-numeric prefixes or postfixes (e.g. on XNAT in some circumstances).
         """
-        pass
 
     @property
     @abstractmethod
@@ -175,57 +145,51 @@ def subject(self) -> str:
         truncated or extended version of it as subject may be to experiment
         on XNAT).
         """
-        pass
 
     @property
     @abstractmethod
     def description(self) -> str:
         """The series description (as from the dicom headers).
         """
-        pass
 
     @property
     @abstractmethod
     def names(self) -> list[str]:
         """A list of valid scan names that may be applied to this series.
         """
-        pass
 
     @property
     @abstractmethod
     def image_type(self) -> str:
         """The ImageType from the dicom headers.
         """
-        pass
 
     @property
     @abstractmethod
     def uid(self) -> str:
         """The UID from the dicom headers.
         """
-        pass
-
 
     @abstractmethod
     def is_usable(self) -> bool:
         """Indicates whether the series contains usable dcm files.
         """
-        pass
 
     @abstractmethod
     def get_files(self, dest_dir, *args, **kwargs):
         """Retrieve dcm files for this series and store them in dest_dir.
         """
-        pass
 
     @abstractmethod
-    def set_datman_name(self, ident: str, tags: 'datman.config.TagInfo'
-        ) -> list[str]:
-        pass
+    def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo'
+                        ) -> list[str]:
+        """Construct a datman-style name for the scan.
+        """
 
     @abstractmethod
     def set_tag(self, tag_map):
-        pass
+        """Set the scan tag for the scan.
+        """
 
     def _mangle_descr(self) -> str:
         """Modify a series description to remove non-alphanumeric characters.
@@ -234,7 +198,9 @@ def _mangle_descr(self) -> str:
             return ""
         return re.sub(r"[^a-zA-Z0-9.+]+", "-", self.description)
 
-    def is_derived(self):
+    def is_derived(self) -> bool:
+        """Check if the scan is derived or primary.
+        """
         if not self.image_type:
             logger.warning(
                 f"Image type could not be found for series {self.series}. "
@@ -245,24 +211,42 @@ def is_derived(self):
         return False
 
 
-###############################################################################
-#### XNAT classes, formerly in xnat.py
+class XNATObject(ABC):
+    """A meta class for classes that manage XNAT contents.
+    """
 
+    @property
+    @abstractmethod
+    def raw_json(self) -> dict:
+        """The json for the XNAT entity.
+        """
 
-class XNATObject(ABC):
-    def _get_field(self, key):
+    def get_field(self, key):
+        """Get an item from an XNAT object's data fields.
+        """
         if not self.raw_json.get("data_fields"):
             return ""
         return self.raw_json["data_fields"].get(key, "")
 
 
 class XNATSubject(XNATObject):
+    """An XNAT subject, which may hold one or more experiments.
+    """
+
     def __init__(self, subject_json):
         self.raw_json = subject_json
-        self.name = self._get_field("label")
-        self.project = self._get_field("project")
+        self.name = self.get_field("label")
+        self.project = self.get_field("project")
         self.experiments = self._get_experiments()
 
+    @property
+    def raw_json(self) -> dict:
+        return self._json
+
+    @raw_json.setter
+    def raw_json(self, value):
+        self._json = value
+
     def _get_experiments(self):
         experiments = [
             exp for exp in self.raw_json["children"]
@@ -288,22 +272,25 @@ def __repr__(self):
 
 
 class XNATExperiment(SessionImporter, XNATObject):
+    """An XNAT experiment which may hold scan data and resource files.
+    """
+
     def __init__(self, project, subject_name, experiment_json,
                  ident=None):
         self.raw_json = experiment_json
         self.project = project
         self.subject = subject_name
-        self.uid = self._get_field("UID")
-        self.id = self._get_field("ID")
-        self.date = self._get_field("date")
+        self.uid = self.get_field("UID")
+        self.id = self.get_field("ID")
+        self.date = self.get_field("date")
         self.ident = ident
 
         if self.is_shared():
             self.name = [label for label in self.get_alt_labels()
                          if self.subject in label][0]
-            self.source_name = self._get_field("label")
+            self.source_name = self.get_field("label")
         else:
-            self.name = self._get_field("label")
+            self.name = self.get_field("label")
             self.source_name = self.name
 
         # The subdirectory to find the dicoms in after download
@@ -311,15 +298,23 @@ def __init__(self, project, subject_name, experiment_json,
 
         # Scan attributes
         self.scans = self._get_scans()
-        self.scan_UIDs = self._get_scan_UIDs()
-        self.scan_resource_IDs = self._get_scan_rIDs()
+        self.scan_uids = self._get_scan_uids()
+        self.scan_resource_ids = self._get_scan_rids()
 
         # Resource attributes
         self.resource_files = self._get_contents("resources/resource")
-        self.resource_IDs = self._get_resource_IDs()
+        self.resource_ids = self._get_resource_ids()
 
         # Misc - basically just OPT CU1 needs this
-        self.misc_resource_IDs = self._get_other_resource_IDs()
+        self.misc_resource_ids = self._get_other_resource_ids()
+
+    @property
+    def raw_json(self) -> dict:
+        return self._json
+
+    @raw_json.setter
+    def raw_json(self, value):
+        self._json = value
 
     @property
     def ident(self) -> 'datman.scanid.Identifier':
@@ -361,6 +356,14 @@ def date(self) -> str:
     def date(self, value: str):
         self._date = value
 
+    @property
+    def resource_files(self) -> list[str]:
+        return self._resource_files
+
+    @resource_files.setter
+    def resource_files(self, value):
+        self._resource_files = value
+
     @property
     def dcm_subdir(self) -> str:
         return self._dcm_subdir
@@ -387,10 +390,10 @@ def _get_scans(self):
             xnat_scans.append(XNATScan(self, scan_json))
         return xnat_scans
 
-    def _get_scan_UIDs(self):
+    def _get_scan_uids(self):
         return [scan.uid for scan in self.scans]
 
-    def _get_scan_rIDs(self):
+    def _get_scan_rids(self):
         # These can be used to download a series from xnat
         resource_ids = []
         for scan in self.scans:
@@ -408,7 +411,7 @@ def _get_scan_rIDs(self):
                     resource_ids.append(str(r_id))
         return resource_ids
 
-    def _get_resource_IDs(self):
+    def _get_resource_ids(self):
         if not self.resource_files:
             return {}
 
@@ -419,7 +422,7 @@ def _get_resource_IDs(self):
                 resource["data_fields"]["xnat_abstractresource_id"])
         return resource_ids
 
-    def _get_other_resource_IDs(self):
+    def _get_other_resource_ids(self):
         """
         OPT's CU site uploads niftis to their server. These niftis are neither
         classified as resources nor as scans so our code misses them entirely.
@@ -485,7 +488,7 @@ def get_autorun_ids(self, xnat):
             XnatException: If no AutoRun.xml pipeline instance is found or
                 the API response can't be parsed.
         """
-        query_xml = """
+        query_xml = f"""
             <xdat:bundle
                     xmlns:xdat="http://nrg.wustl.edu/security"
                     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
@@ -513,12 +516,12 @@ def get_autorun_ids(self, xnat):
                     <xdat:criteria override_value_formatting="0">
                         <xdat:schema_field>wrk:workflowData/ID</xdat:schema_field>
                         <xdat:comparison_type>LIKE</xdat:comparison_type>
-                        <xdat:value>{exp_id}</xdat:value>
+                        <xdat:value>{self.id}</xdat:value>
                     </xdat:criteria>
                     <xdat:criteria override_value_formatting="0">
                         <xdat:schema_field>wrk:workflowData/ExternalID</xdat:schema_field>
                         <xdat:comparison_type>=</xdat:comparison_type>
-                        <xdat:value>{project}</xdat:value>
+                        <xdat:value>{self.project}</xdat:value>
                     </xdat:criteria>
                     <xdat:criteria override_value_formatting="0">
                         <xdat:schema_field>wrk:workflowData/pipeline_name</xdat:schema_field>
@@ -527,18 +530,18 @@ def get_autorun_ids(self, xnat):
                     </xdat:criteria>
                 </xdat:search_where>
             </xdat:bundle>
-        """.format(exp_id=self.id, project=self.project)  # noqa: E501
+        """  # noqa: E501
 
         query_url = f"{xnat.server}/data/search?format=json"
-        response = xnat._make_xnat_post(query_url, data=query_xml)
+        response = xnat.make_xnat_post(query_url, data=query_xml)
 
         if not response:
             raise XnatException("AutoRun.xml pipeline not found.")
 
         try:
             found_pipelines = json.loads(response)
-        except json.JSONDecodeError:
-            raise XnatException("Can't decode workflow query response.")
+        except json.JSONDecodeError as e:
+            raise XnatException("Can't decode workflow query response.") from e
 
         try:
             results = found_pipelines["ResultSet"]["Result"]
@@ -554,31 +557,32 @@ def get_resource_uris(self, xnat_connection):
         Returns a list of all resource URIs from this session.
         """
         resources = []
-        resource_ids = list(self.resource_IDs.values())
-        resource_ids.extend(self.misc_resource_IDs)
+        resource_ids = list(self.resource_ids.values())
+        resource_ids.extend(self.misc_resource_ids)
         for r_id in resource_ids:
             resource_list = xnat_connection.get_resource_list(
                 self.project, self.subject, self.name, r_id)
             resources.extend([item["URI"] for item in resource_list])
         return resources
 
-    def get_files(self, dest_folder, xnat, *args, zip_name=None, **kwargs):
+    # pylint: disable-next=arguments-differ
+    def get_files(self, dest_dir, xnat, *args, zip_name=None, **kwargs):
         """
         Download a zip file containing all data for this session. Returns the
         path to the new file if download is successful, raises an exception if
         not
 
         Args:
-            dest_folder: The absolute path to the folder where the zip
+            dest_dir: The absolute path to the folder where the zip
                 should be deposited
-            xnat: An instance of datman.xnat.xnat()
+            xnat: An instance of datman.xnat.XNAT()
             zip_name: An optional name for the output zip file. If not
                 set the zip name will be session.name
 
         """
-        resources_list = list(self.scan_resource_IDs)
-        resources_list.extend(self.misc_resource_IDs)
-        resources_list.extend(self.resource_IDs)
+        resources_list = list(self.scan_resource_ids)
+        resources_list.extend(self.misc_resource_ids)
+        resources_list.extend(self.resource_ids)
 
         if not resources_list:
             raise ValueError(f"No scans or resources found for {self.name}")
@@ -590,13 +594,13 @@ def get_files(self, dest_folder, xnat, *args, zip_name=None, **kwargs):
         if not zip_name:
             zip_name = self.name.upper() + ".zip"
 
-        output_path = os.path.join(dest_folder, zip_name)
+        output_path = os.path.join(dest_dir, zip_name)
         if os.path.exists(output_path):
             logger.error(
                 f"Cannot download {output_path}, file already exists.")
             return output_path
 
-        xnat._get_xnat_stream(url, output_path)
+        xnat.get_xnat_stream(url, output_path)
 
         return output_path
 
@@ -607,7 +611,7 @@ def is_shared(self) -> bool:
         if not alt_names:
             return False
 
-        return any([self.subject in label for label in alt_names])
+        return any(self.subject in label for label in alt_names)
 
     def get_alt_labels(self):
         """Find the names for all shared copies of the XNAT experiment.
@@ -625,27 +629,38 @@ def __repr__(self):
 
 
 class XNATScan(SeriesImporter, XNATObject):
+    """A single XNAT series.
+    """
+
     def __init__(self, experiment, scan_json):
+        self.raw_json = scan_json
         self.project = experiment.project
         self.subject = experiment.subject
         self.experiment = experiment.name
         self.shared = experiment.is_shared()
         self.source_experiment = experiment.source_name
         self.raw_json = scan_json
-        self.uid = self._get_field("UID")
-        self.series = self._get_field("ID")
-        self.image_type = self._get_field("parameters/imageType")
+        self.uid = self.get_field("UID")
+        self.series = self.get_field("ID")
+        self.image_type = self.get_field("parameters/imageType")
         self.multiecho = self.is_multiecho()
         self.description = self._set_description()
-        self.type = self._get_field("type")
+        self.type = self.get_field("type")
         self.names = []
+        self.echo_dict = {}  # Will remain empty unless scan is multi-echo
         self.tags = []
         self.dcm_dir = None
 
-    # Use properties here to conform with SeriesImporter interface
-    # and guarantee at creation that expected attributes exist
     @property
-    def dcm_dir(self) ->str:
+    def raw_json(self) -> dict:
+        return self._json
+
+    @raw_json.setter
+    def raw_json(self, value):
+        self._json = value
+
+    @property
+    def dcm_dir(self) -> str:
         return self._dcm_dir
 
     @dcm_dir.setter
@@ -684,6 +699,14 @@ def names(self) -> list[str]:
     def names(self, value: list[str]):
         self._names = value
 
+    @property
+    def image_type(self) -> str:
+        return self._image_type
+
+    @image_type.setter
+    def image_type(self, value):
+        self._image_type = value
+
     @property
     def uid(self) -> list[str]:
         return self._uid
@@ -693,12 +716,14 @@ def uid(self, value: list[str]):
         self._uid = value
 
     def _set_description(self):
-        series_descr = self._get_field("series_description")
+        series_descr = self.get_field("series_description")
         if series_descr:
             return series_descr
-        return self._get_field("type")
+        return self.get_field("type")
 
     def is_multiecho(self):
+        """Check if the series is multiecho.
+        """
         try:
             child = self.raw_json["children"][0]["items"][0]
         except (KeyError, IndexError):
@@ -709,6 +734,8 @@ def is_multiecho(self):
         return False
 
     def raw_dicoms_exist(self):
+        """Check if any dicom files exist for the scan.
+        """
         for child in self.raw_json["children"]:
             for item in child["items"]:
                 file_type = item["data_fields"].get("content")
@@ -747,7 +774,8 @@ def _set_fmap_tag(self, tag_map, matches):
                 if tag in matches:
                     if not re.search(pattern["ImageType"], self.image_type):
                         del matches[tag]
-        except Exception:
+        except (re.error, TypeError) as e:
+            logger.error(f"Error applying FMAP tags: {e}. Ignoring tag.")
             matches = {}
 
         if len(matches) > 2 or (len(matches) == 2 and not self.multiecho):
@@ -759,15 +787,17 @@ def set_datman_name(self, base_name, tags):
         mangled_descr = self._mangle_descr()
         padded_series = self.series.zfill(2)
         tag_settings = self.set_tag(tags.series_map)
+
         if not tag_settings:
             raise ParseException(
                 f"Can't identify tag for series {self.series}")
+
         names = []
         self.echo_dict = {}
-        for tag in tag_settings:
+        for tag, settings in tag_settings.items():
             name = "_".join([base_name, tag, padded_series, mangled_descr])
             if self.multiecho:
-                echo_num = tag_settings[tag]["EchoNumber"]
+                echo_num = settings["EchoNumber"]
                 if echo_num not in self.echo_dict:
                     self.echo_dict[echo_num] = name
             names.append(name)
@@ -806,14 +836,15 @@ def is_usable(self, strict=False):
 
         return True
 
-    def get_files(self, output_dir, xnat_conn, *args, **kwargs):
+    # pylint: disable-next=arguments-differ
+    def get_files(self, dest_dir, xnat_conn, *args, **kwargs):
         """Download all dicoms for this series.
 
         This will download all files in the series, and if successful,
         set the dcm_dir attribute to the destination folder.
 
         Args:
-            output_dir (:obj:`str`): The full path to the location to
+            dest_dir (:obj:`str`): The full path to the location to
                 download all files to.
             xnat_conn (:obj:`datman.xnat.xnat`): An open xnat connection
                 to the server to download from.
@@ -832,7 +863,7 @@ def get_files(self, output_dir, xnat_conn, *args, **kwargs):
         try:
             dicom_zip = xnat_conn.get_dicom(self.project, self.subject,
                                             self.experiment, self.series)
-        except Exception as e:
+        except XnatException as e:
             logger.error(f"Failed to download dicom archive for {self.subject}"
                          f" series {self.series}. Reason - {e}")
             return False
@@ -849,21 +880,21 @@ def get_files(self, output_dir, xnat_conn, *args, **kwargs):
 
         try:
             with ZipFile(dicom_zip, "r") as fh:
-                fh.extractall(output_dir)
-        except Exception as e:
+                fh.extractall(dest_dir)
+        except (BadZipFile, PermissionError) as e:
             logger.error("An error occurred unpacking dicom archive for "
                          f"{self.experiment}'s series {self.series}' - {e}")
             os.remove(dicom_zip)
             return False
-        else:
-            logger.info("Unpacking complete. Deleting archive file "
-                        f"{dicom_zip}")
-            os.remove(dicom_zip)
+
+        logger.info("Unpacking complete. Deleting archive file "
+                    f"{dicom_zip}")
+        os.remove(dicom_zip)
 
         if self.shared:
-            self._fix_download_name(output_dir)
+            self._fix_download_name(dest_dir)
 
-        dicom_file = self._find_first_dicom(output_dir)
+        dicom_file = self._find_first_dicom(dest_dir)
 
         try:
             self.dcm_dir = os.path.dirname(dicom_file)
@@ -884,11 +915,12 @@ def _find_first_dicom(self, dcm_dir):
                 exist in the folder.
         """
         search_dir = self._find_series_dir(dcm_dir)
-        for root_dir, folder, files in os.walk(search_dir):
+        for root_dir, _, files in os.walk(search_dir):
             for item in files:
                 path = os.path.join(root_dir, item)
                 if is_dicom(path):
                     return path
+        return None
 
     def _find_series_dir(self, search_dir):
         """Find the directory a series was downloaded to, if any.
@@ -943,11 +975,9 @@ def __repr__(self):
         return self.__str__()
 
 
-#############################################################################
-# Zip file classes
-
-
 class ZipImporter(SessionImporter):
+    """A zip file to be managed by datman.
+    """
 
     def __init__(self, ident, zip_path):
         self.ident = ident
@@ -1026,28 +1056,28 @@ def is_shared(self) -> bool:
         # Can't track shared sessions with zip files.
         return False
 
-    def get_files(self, dest_path: str, *args, **kwargs):
+    def get_files(self, dest_dir: str, *args, **kwargs):
         """Unpack the zip file at the given location.
 
         Args:
-            dest_path (str): The full path to the location to extract into.
+            dest_dir (str): The full path to the location to extract into.
         """
         for item in self.scans:
-            item.get_files(dest_path)
-        self.extract_resources(dest_path)
+            item.get_files(dest_dir)
+        self.get_resources(dest_dir)
 
-    def get_resources(self, dest_path: str, fname: str = None):
+    def get_resources(self, dest_dir: str, fname: str = None):
         """Unpack resource (non-dicom) files at the given location.
 
         Args:
-            dest_path (str): The full path to the location to extract into.
+            dest_dir (str): The full path to the location to extract into.
         """
         with ZipFile(self.path, "r") as fh:
             if fname:
-                fh.extract(fname, path=dest_path)
+                fh.extract(fname, path=dest_dir)
                 return
-            for item in self.resources_files:
-                fh.extract(item, path=dest_path)
+            for item in self.resource_files:
+                fh.extract(item, path=dest_dir)
 
     def parse_contents(self) -> dict:
         """Read and organize the contents of the zip file.
@@ -1076,11 +1106,11 @@ def get_scans(self) -> list['ZipSeriesImporter']:
         headers = get_archive_headers(self.path)
         scans = {}
         duplicate_series = set()
-        for sub_path in headers:
+        for sub_path, header in headers.items():
             # .get_full_subjectid may need to be changed for compatibility
             zip_scan = ZipSeriesImporter(
                     self.ident.get_full_subjectid(), self.path, sub_path,
-                    headers[sub_path], self.contents['scans'][sub_path]
+                    header, self.contents['scans'][sub_path]
             )
             if zip_scan.series in scans:
                 duplicate_series.add(zip_scan.series)
@@ -1106,7 +1136,10 @@ def __repr__(self):
 
 
 class ZipSeriesImporter(SeriesImporter):
+    """A single scan series from a zip file to be managed by datman.
+    """
 
+    # pylint: disable-next=too-many-arguments,too-many-positional-arguments
     def __init__(self, subject, zip_file, series_dir, header, zip_items):
         self.subject = subject
         self.zip_file = zip_file
@@ -1119,10 +1152,9 @@ def __init__(self, subject, zip_file, series_dir, header, zip_items):
         self.uid = str(header.get('StudyInstanceUID'))
         self.image_type = header.get("ImageType")
         self.names = []
+        self.tags = []
         self.dcm_dir = None
 
-    # Use properties here to conform with SeriesImporter interface
-    # and guarantee at creation that expected attributes exist
     @property
     def dcm_dir(self) -> str:
         return self._dcm_dir
@@ -1184,16 +1216,16 @@ def uid(self, value: list[str]):
         self._uid = value
 
     def is_usable(self):
-        return any([item.endswith(".dcm") for item in self.contents])
+        return any(item.endswith(".dcm") for item in self.contents)
 
-    def get_files(self, output_dir: str, *args, **kwargs):
+    def get_files(self, dest_dir: str, *args, **kwargs):
         with ZipFile(self.zip_file, "r") as fh:
             for item in self.contents:
-                fh.extract(item, path=output_dir)
-        self.dcm_dir = os.path.join(output_dir, self.series_dir)
+                fh.extract(item, path=dest_dir)
+        self.dcm_dir = os.path.join(dest_dir, self.series_dir)
 
     def set_datman_name(self, base_name: str, tags: 'datman.config.TagInfo'
-            ) -> list[str]:
+                        ) -> list[str]:
         mangled_descr = self._mangle_descr()
         tag_settings = self.set_tag(tags.series_map)
         if not tag_settings:
@@ -1224,10 +1256,12 @@ def set_tag(self, tag_map):
                 matches[tag] = pattern
 
         if (len(matches) == 1 or
-                all(['EchoNumber' in matches[tag] for tag in matches])):
+                all('EchoNumber' in conf for conf in matches.values())):
             self.tags = list(matches.keys())
             return matches
 
+        return {}
+
     def __str__(self):
         return f"<ZipSeriesImporter {self.series} - {self.description}>"
 
diff --git a/datman/xnat.py b/datman/xnat.py
index 3cfdc7e1..debc93b8 100644
--- a/datman/xnat.py
+++ b/datman/xnat.py
@@ -11,13 +11,28 @@
 import requests
 
 from datman.exceptions import UndefinedSetting, XnatException, InputException
-from datman.importers import XNATSubject, XNATExperiment, XNATScan
+from datman.importers import XNATSubject, XNATExperiment
 
 
 logger = logging.getLogger(__name__)
 
 
-def get_server(config=None, url=None, port=None):
+def get_server(config: 'datman.config.config' = None,
+               url: str = None,
+               port: str = None):
+    """Get correctly formatted XNAT server URL.
+
+    Args:
+        config (:obj:`datman.config.config`, optional): A datman configuration
+            object. Must be provided if url argument is not given.
+        url (:obj:`str`, optional): A server url to use (and possibly
+            re-adjust). Must be provided if config argument is not given.
+        port (:obj:`str`, optional): A string representation of a port to use
+            instead of traditional http/https ports.
+
+    Returns:
+        str: A server url of the expected format.
+    """
     if not config and not url:
         raise XnatException("Can't construct a valid server URL without a "
                             "datman.config.config instance or string url")
@@ -74,34 +89,47 @@ def get_port_str(config=None, port=None):
 
 
 def get_auth(username=None, file_path=None):
+    """Retrieve username and password for XNAT.
+
+    If no inputs are given then the environment variables XNAT_USER and
+    XNAT_PASS will be used.
+
+    Args:
+        username (:obj:`str`, optional): A username to use. If given, the
+            user will be prompted for a password.
+        file_path (:obj:`str`, optional): A path to a credentials file.
+
+    Returns:
+        tuple(str, str): A tuple containing a username and password.
+    """
     if username:
         return (username, getpass.getpass())
 
     if file_path:
         try:
-            with open(file_path, "r") as cred_file:
+            with open(file_path, "r", encoding="utf-8") as cred_file:
                 contents = cred_file.readlines()
         except Exception as e:
             raise XnatException(
                 f"Failed to read credentials file {file_path}. "
-                f"Reason - {e}")
+                f"Reason - {e}") from e
         try:
             username = contents[0].strip()
             password = contents[1].strip()
-        except IndexError:
+        except IndexError as e:
             raise XnatException(
                 f"Failed to read credentials file {file_path} - "
-                "incorrectly formatted.")
+                "incorrectly formatted.") from e
         return (username, password)
 
     try:
         username = os.environ["XNAT_USER"]
     except KeyError:
-        raise KeyError("XNAT_USER not defined in environment")
+        raise KeyError("XNAT_USER not defined in environment") from None
     try:
         password = os.environ["XNAT_PASS"]
     except KeyError:
-        raise KeyError("XNAT_PASS not defined in environment")
+        raise KeyError("XNAT_PASS not defined in environment") from None
 
     return (username, password)
 
@@ -143,7 +171,7 @@ def get_connection(config, site=None, url=None, auth=None, server_cache=None):
     server_url = get_server(url=url)
 
     if auth:
-        connection = xnat(server_url, auth[0], auth[1])
+        connection = XNAT(server_url, auth[0], auth[1])
     else:
         try:
             auth_file = config.get_key("XnatCredentials", site=site)
@@ -155,7 +183,7 @@ def get_connection(config, site=None, url=None, auth=None, server_cache=None):
                 # User probably provided metadata file name only
                 auth_file = os.path.join(config.get_path("meta"), auth_file)
         username, password = get_auth(file_path=auth_file)
-        connection = xnat(server_url, username, password)
+        connection = XNAT(server_url, username, password)
 
     if server_cache is not None:
         server_cache[url] = connection
@@ -163,7 +191,11 @@ def get_connection(config, site=None, url=None, auth=None, server_cache=None):
     return connection
 
 
-class xnat(object):
+# pylint: disable-next=too-many-public-methods
+class XNAT:
+    """Manage a connection to an XNAT server.
+    """
+
     server = None
     auth = None
     headers = None
@@ -178,12 +210,13 @@ def __init__(self, server, username, password):
             self.open_session()
         except Exception as e:
             raise XnatException(
-                f"Failed to open session with server {server}. Reason - {e}")
+                f"Failed to open session with server {server}. Reason - {e}"
+                ) from e
 
     def __enter__(self):
         return self
 
-    def __exit__(self, type, value, traceback):
+    def __exit__(self, *args):
         # Ends the session on the server side
         url = f"{self.server}/data/JSESSION"
         self.session.delete(url)
@@ -197,10 +230,10 @@ def open_session(self):
 
         response = s.post(url, auth=self.auth)
 
-        if not response.status_code == requests.codes.ok:
+        if response.status_code != 200:
             logger.warning(f"Failed connecting to xnat server {self.server} "
                            f"with response code {response.status_code}")
-            logger.debug("Username: {}")
+            logger.debug(f"Username: {self.auth[0]}")
             response.raise_for_status()
 
         # If password is expired, XNAT returns status 200 and a sea of
@@ -242,9 +275,10 @@ def get_projects(self, project=""):
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
+        except Exception as e:
             raise XnatException(
-                f"Failed getting projects from server with search URL {url}")
+                f"Failed getting projects from server with search URL {url}"
+                ) from e
 
         if not result:
             logger.debug(f"No projects found on server {self.server}")
@@ -264,8 +298,8 @@ def find_project(self, subject_id, projects=None):
                 the search to. Defaults to None.
 
         Returns:
-            str: The name of the XNAT project the subject belongs to. Note:
-                if the same ID is found in more than one project only the
+            str or None: The name of the XNAT project the subject belongs to.
+                Note: if the same ID is found in more than one project only the
                 first match is returned.
         """
         if not projects:
@@ -280,6 +314,7 @@ def find_project(self, subject_id, projects=None):
                 logger.debug(
                     f"Found session {subject_id} in project {project}")
                 return project
+        return None
 
     def get_subject_ids(self, project):
         """Retrieve the IDs for all subjects within an XNAT project.
@@ -303,8 +338,9 @@ def get_subject_ids(self, project):
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
-            raise XnatException(f"Failed getting xnat subjects with URL {url}")
+        except Exception as e:
+            raise XnatException(f"Failed getting xnat subjects with URL {url}"
+                                ) from e
 
         if not result:
             return []
@@ -312,7 +348,8 @@ def get_subject_ids(self, project):
         try:
             subids = [item["label"] for item in result["ResultSet"]["Result"]]
         except KeyError as e:
-            raise XnatException(f"get_subject_ids - Malformed response. {e}")
+            raise XnatException(f"get_subject_ids - Malformed response. {e}"
+                                ) from None
 
         return subids
 
@@ -340,9 +377,10 @@ def get_subject(self, project, subject_id, create=False):
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
+        except Exception as e:
             raise XnatException(
-                f"Failed getting subject {subject_id} with URL {url}")
+                f"Failed getting subject {subject_id} with URL {url}"
+                ) from e
 
         if not create and not result:
             raise XnatException(
@@ -355,9 +393,9 @@ def get_subject(self, project, subject_id, create=False):
 
         try:
             subject_json = result["items"][0]
-        except (IndexError, KeyError):
+        except (IndexError, KeyError) as e:
             raise XnatException(
-                f"Could not access metadata for subject {subject_id}")
+                f"Could not access metadata for subject {subject_id}") from e
 
         return XNATSubject(subject_json)
 
@@ -378,7 +416,7 @@ def make_subject(self, project, subject):
         except requests.exceptions.RequestException as e:
             raise XnatException(
                 f"Failed to create xnat subject {subject} in project "
-                f"{project}. Reason - {e}")
+                f"{project}. Reason - {e}") from e
 
     def find_subject(self, project, exper_id):
         """Find the parent subject ID for an experiment.
@@ -398,9 +436,9 @@ def find_subject(self, project, exper_id):
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
-            XnatException(f"Failed to query XNAT server {project} for "
-                          f"experiment {exper_id}")
+        except Exception as e:
+            raise XnatException(f"Failed to query XNAT server {project} for "
+                                f"experiment {exper_id}") from e
         return result["items"][0]["data_fields"]["subject_ID"]
 
     def get_experiment_ids(self, project, subject=""):
@@ -429,16 +467,17 @@ def get_experiment_ids(self, project, subject=""):
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
+        except Exception as e:
             raise XnatException(
                 f"Failed getting experiment IDs for subject {subject}"
-                f" with URL {url}")
+                f" with URL {url}") from e
 
         if not result:
             return []
 
         return [item.get("label") for item in result["ResultSet"]["Result"]]
 
+    # pylint: disable-next=too-many-arguments,too-many-positional-arguments
     def get_experiment(self, project, subject_id=None, exper_id=None,
                        create=False, ident=None):
         """Get an experiment from the XNAT server.
@@ -483,8 +522,9 @@ def get_experiment(self, project, subject_id=None, exper_id=None,
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
-            raise XnatException(f"Failed getting experiment with URL {url}")
+        except Exception as e:
+            raise XnatException(f"Failed getting experiment with URL {url}"
+                                ) from e
 
         if not create and not result:
             raise XnatException(
@@ -499,9 +539,9 @@ def get_experiment(self, project, subject_id=None, exper_id=None,
 
         try:
             exper_json = result["items"][0]
-        except (IndexError, KeyError):
+        except (IndexError, KeyError) as e:
             raise XnatException(
-                f"Could not access metadata for experiment {exper_id}")
+                f"Could not access metadata for experiment {exper_id}") from e
 
         return XNATExperiment(project, subject_id, exper_json, ident=ident)
 
@@ -525,7 +565,7 @@ def make_experiment(self, project, subject, experiment):
         except requests.exceptions.RequestException as e:
             raise XnatException(
                 f"Failed to create XNAT experiment {experiment} under "
-                f"subject {subject} in project {project}. Reason - {e}")
+                f"subject {subject} in project {project}. Reason - {e}") from e
 
     def get_scan_ids(self, project, subject, experiment):
         """Retrieve all scan IDs for an XNAT experiment.
@@ -553,10 +593,10 @@ def get_scan_ids(self, project, subject, experiment):
 
         try:
             result = self._make_xnat_query(url)
-        except Exception:
+        except Exception as e:
             raise XnatException(
                 f"Failed getting scan IDs for experiment {experiment} with "
-                f"URL {url}")
+                f"URL {url}") from e
 
         if not result:
             return []
@@ -566,59 +606,16 @@ def get_scan_ids(self, project, subject, experiment):
                 item.get("ID") for item in result["ResultSet"]["Result"]
             ]
         except KeyError as e:
-            raise XnatException(f"get_scan_ids - Malformed response. {e}")
+            raise XnatException(f"get_scan_ids - Malformed response. {e}"
+                                ) from None
 
         return scan_ids
 
-    def get_scan(self, project, subject_id, exper_id, scan_id):
-        """Get a scan from the XNAT server.
-
-        Args:
-            project (:obj:`str`): The XNAT project to search within.
-            subject_id (:obj:`str`): The XNAT subject to search.
-            exper_id (:obj:`str`): The XNAT experiment to search.
-            scan_id (:obj:`str`): The ID of the scan to retrieve.
-
-        Raises:
-            XnatException: If the scan does not exist or the server/API can't
-                be accessed.
-
-        Returns:
-            :obj:`datman.xnat.XNATScan`: An XNATScan instance matching the
-                scan ID from the given experiment.
-        """
-        logger.debug(
-            f"Querying XNAT server {self.server} for scan {scan_id} in "
-            f"experiment {exper_id} belonging to subject {subject_id} in "
-            f"project {project}")
-
-        url = (
-            f"{self.server}/data/archive/projects/{project}/subject_ids/"
-            f"{subject_id}/exper_ids/{exper_id}/scans/{scan_id}/?format=json")
-
-        try:
-            result = self._make_xnat_query(url)
-        except Exception:
-            raise XnatException(f"Failed getting scan with URL {url}")
-
-        if not result:
-            raise XnatException(
-                f"Scan {scan_id} does not exist for experiment {exper_id} "
-                f"in project {project}")
-
-        try:
-            scan_json = result["items"][0]
-        except (IndexError, KeyError):
-            raise XnatException(
-                f"Could not access metadata for scan {scan_id}")
-
-        return XNATScan(project, subject_id, exper_id, scan_json)
-
     def get_resource_ids(self,
                          study,
                          session,
                          experiment,
-                         folderName=None,
+                         folder_name=None,
                          create=True):
         """
         Return a list of resource id's (subfolders) from an experiment
@@ -629,8 +626,9 @@ def get_resource_ids(self,
                "/resources/?format=json")
         try:
             result = self._make_xnat_query(url)
-        except Exception:
-            raise XnatException(f"Failed getting resource ids with url: {url}")
+        except Exception as e:
+            raise XnatException(f"Failed getting resource ids with url: {url}"
+                                ) from e
         if result is None:
             raise XnatException(
                 f"Experiment: {experiment} not found for session: {session}"
@@ -638,27 +636,26 @@ def get_resource_ids(self,
 
         if create and int(result["ResultSet"]["totalRecords"]) < 1:
             return self.create_resource_folder(study, session, experiment,
-                                               folderName)
+                                               folder_name)
 
         resource_ids = {}
         for r in result["ResultSet"]["Result"]:
             label = r.get("label", "No Label")
             resource_ids[label] = r["xnat_abstractresource_id"]
 
-        if not folderName:
+        if not folder_name:
             # foldername not specified return them all
-            resource_id = [val for val in resource_ids.values()]
+            resource_id = list(resource_ids.values())
         else:
             # check if folder exists, if not create it
             try:
-                resource_id = resource_ids[folderName]
+                resource_id = resource_ids[folder_name]
             except KeyError:
                 # folder doesn't exist, create it
                 if not create:
                     return None
-                else:
-                    resource_id = self.create_resource_folder(
-                        study, session, experiment, folderName)
+                resource_id = self.create_resource_folder(
+                    study, session, experiment, folder_name)
 
         return resource_id
 
@@ -681,8 +678,9 @@ def get_resource_list(self, study, session, experiment, resource_id):
                f"/resources/{resource_id}/?format=xml")
         try:
             result = self._make_xnat_xml_query(url)
-        except Exception:
-            raise XnatException(f"Failed getting resources with url: {url}")
+        except Exception as e:
+            raise XnatException(f"Failed getting resources with url: {url}"
+                                ) from e
         if result is None:
             raise XnatException(
                 f"Experiment: {experiment} not found for session: {session}"
@@ -711,23 +709,23 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3):
 
         try:
             with open(filename, "rb") as data:
-                self._make_xnat_post(upload_url, data, retries, headers)
+                self.make_xnat_post(upload_url, data, retries, headers)
         except XnatException as e:
             e.study = project
             e.session = experiment
             raise e
+        except requests.exceptions.RequestException as e:
+            err = XnatException(f"Error uploading data with url: {upload_url}")
+            err.study = project
+            err.session = experiment
+            raise err from e
         except IOError as e:
             logger.error(
                 f"Failed to open file: {filename} with excuse: {e.strerror}")
             err = XnatException(f"Error in file: {filename}")
             err.study = project
             err.session = experiment
-            raise err
-        except requests.exceptions.RequestException:
-            err = XnatException(f"Error uploading data with url: {upload_url}")
-            err.study = project
-            err.session = experiment
-            raise err
+            raise err from e
 
     def get_dicom(self,
                   project,
@@ -751,34 +749,32 @@ def get_dicom(self,
             os.close(filename[0])
             filename = filename[1]
         try:
-            self._get_xnat_stream(url, filename, retries)
+            self.get_xnat_stream(url, filename, retries)
             return filename
-        except Exception:
+        except Exception as e:
             try:
                 os.remove(filename)
-            except OSError as e:
+            except OSError as exc:
                 logger.warning(f"Failed to delete tempfile: {filename} with "
-                               f"excuse: {str(e)}")
+                               f"excuse: {str(exc)}")
             err = XnatException(f"Failed getting dicom with url: {url}")
             err.study = project
             err.session = session
-            raise err
+            raise err from e
 
-    def put_resource(self,
-                     project,
-                     subject,
-                     experiment,
-                     filename,
-                     data,
-                     folder,
-                     retries=3):
-        """
-        POST a resource file to the xnat server
+    def put_resource(self, project, subject, experiment, filename, data,
+                     folder):
+        """Upload a resource file to the XNAT server.
 
         Args:
-            filename: string to store filename as
-            data: string containing data
-                (such as produced by zipfile.ZipFile.read())
+            project (:obj:`str`): the project to upload to.
+            subject (:obj:`str`): The subject ID to upload to.
+            experiment (:obj:`str`): the experiment ID to upload to.
+            filename (:obj:`str`): The absolute path to a file to upload
+            data (bytes): Bytes as produced from reading a file with
+                ZipFile.read
+            folder (:obj:`str`): The folder name to deposit the file in on
+                XNAT.
 
         """
 
@@ -793,7 +789,7 @@ def put_resource(self,
         resource_id = self.get_resource_ids(project,
                                             subject,
                                             experiment,
-                                            folderName=folder)
+                                            folder_name=folder)
 
         uploadname = urllib.parse.quote(filename)
 
@@ -803,17 +799,18 @@ def put_resource(self,
                       f"files/{uploadname}?inbody=true")
 
         try:
-            self._make_xnat_post(attach_url, data)
+            self.make_xnat_post(attach_url, data)
         except XnatException as err:
             err.study = project
             err.session = experiment
             raise err
-        except Exception:
+        except Exception as e:
             logger.warning(
                 f"Failed adding resource to xnat with url: {attach_url}")
             err = XnatException("Failed adding resource to xnat")
             err.study = project
             err.session = experiment
+            raise err from e
 
     def get_resource(
         self,
@@ -844,16 +841,17 @@ def get_resource(
             os.close(filename[0])
             filename = filename[1]
         try:
-            self._get_xnat_stream(url, filename, retries)
+            self.get_xnat_stream(url, filename, retries)
             return filename
-        except Exception:
+        except Exception as e:
             try:
                 os.remove(filename)
-            except OSError as e:
+            except OSError as exc:
                 logger.warning(f"Failed to delete tempfile: {filename} with "
-                               f"exclude: {str(e)}")
+                               f"exclude: {str(exc)}")
             logger.error("Failed getting resource from xnat", exc_info=True)
-            raise XnatException(f"Failed downloading resource with url: {url}")
+            raise XnatException(f"Failed downloading resource with url: {url}"
+                                ) from e
 
     def get_resource_archive(
         self,
@@ -879,36 +877,30 @@ def get_resource_archive(
             os.close(filename[0])
             filename = filename[1]
         try:
-            self._get_xnat_stream(url, filename, retries)
+            self.get_xnat_stream(url, filename, retries)
             return filename
-        except Exception:
+        except Exception as e:
             try:
                 os.remove(filename)
-            except OSError as e:
+            except OSError as exc:
                 logger.warning(f"Failed to delete tempfile: {filename} with "
-                               f"error: {str(e)}")
+                               f"error: {str(exc)}")
             logger.error("Failed getting resource archive from xnat",
                          exc_info=True)
             raise XnatException(
-                f"Failed downloading resource archive with url: {url}")
+                f"Failed downloading resource archive with url: {url}") from e
 
-    def delete_resource(
-        self,
-        project,
-        session,
-        experiment,
-        resource_group_id,
-        resource_id,
-        retries=3,
-    ):
+    def delete_resource(self, project, session, experiment, resource_group_id,
+                        resource_id):
         """Delete a resource file from xnat"""
         url = (f"{self.server}/data/archive/projects/{project}/"
                f"subjects/{session}/experiments/{experiment}/"
                f"resources/{resource_group_id}/files/{resource_id}")
         try:
             self._make_xnat_delete(url)
-        except Exception:
-            raise XnatException(f"Failed deleting resource with url: {url}")
+        except Exception as e:
+            raise XnatException(f"Failed deleting resource with url: {url}"
+                                ) from e
 
     def rename_subject(self, project, old_name, new_name, rename_exp=False):
         """Change a subjects's name on XNAT.
@@ -943,8 +935,8 @@ def rename_subject(self, project, old_name, new_name, rename_exp=False):
         except requests.HTTPError as e:
             if e.response.status_code == 409:
                 raise XnatException(f"Can't rename {old_name} to {new_name}."
-                                    "Subject already exists")
-            elif e.response.status_code == 422:
+                                    "Subject already exists") from None
+            if e.response.status_code == 422:
                 # This is raised every time a subject is renamed.
                 pass
             else:
@@ -953,8 +945,6 @@ def rename_subject(self, project, old_name, new_name, rename_exp=False):
         if rename_exp:
             self.rename_experiment(project, new_name, old_name, new_name)
 
-        return
-
     def rename_experiment(self, project, subject, old_name, new_name):
         """Change an experiment's name on XNAT.
 
@@ -1036,9 +1026,8 @@ def share_subject(self, source_project, source_sub, dest_project,
             if e.response.status_code == 409:
                 raise XnatException(
                     f"Can't share {source_sub} as {dest_sub}, subject "
-                    "ID already exists.")
-            else:
-                raise e
+                    "ID already exists.") from None
+            raise e
 
     def share_experiment(self, source_project, source_sub, source_exp,
                          dest_project, dest_exp):
@@ -1077,9 +1066,8 @@ def share_experiment(self, source_project, source_sub, source_exp,
         except requests.HTTPError as e:
             if e.response.status_code == 409:
                 raise XnatException(f"Can't share {source_exp} as {dest_exp}"
-                                    " experiment ID already exists")
-            else:
-                raise e
+                                    " experiment ID already exists") from None
+            raise e
 
     def dismiss_autorun(self, experiment):
         """Mark the AutoRun.xml pipeline as finished.
@@ -1099,37 +1087,39 @@ def dismiss_autorun(self, experiment):
                            "?wrk:workflowData/status=Complete")
             self._make_xnat_put(dismiss_url)
 
-    def _get_xnat_stream(self, url, filename, retries=3, timeout=300):
+    def get_xnat_stream(self, url, filename, retries=3, timeout=300):
+        """Get large objects from XNAT in a stream.
+        """
         logger.debug(f"Getting {url} from XNAT")
         try:
             response = self.session.get(url, stream=True, timeout=timeout)
         except requests.exceptions.Timeout as e:
             if retries > 0:
-                return self._get_xnat_stream(url,
-                                             filename,
-                                             retries=retries - 1,
-                                             timeout=timeout * 2)
-            else:
-                raise e
+                return self.get_xnat_stream(url,
+                                            filename,
+                                            retries=retries - 1,
+                                            timeout=timeout * 2)
+            raise e
 
         if response.status_code == 401:
             logger.info("Session may have expired, resetting")
             self.open_session()
-            return self._get_xnat_stream(
+            return self.get_xnat_stream(
                     url, filename, retries=retries, timeout=timeout)
 
         if response.status_code == 404:
             logger.info(
                 f"No records returned from xnat server for query: {url}")
-            return
-        elif response.status_code == 504:
+            return None
+
+        if response.status_code == 504:
             if retries:
                 logger.warning("xnat server timed out, retrying")
                 time.sleep(30)
-                self._get_xnat_stream(url,
-                                      filename,
-                                      retries=retries - 1,
-                                      timeout=timeout * 2)
+                self.get_xnat_stream(url,
+                                     filename,
+                                     retries=retries - 1,
+                                     timeout=timeout * 2)
             else:
                 logger.error("xnat server timed out, giving up")
                 response.raise_for_status()
@@ -1143,10 +1133,11 @@ def _get_xnat_stream(self, url, filename, retries=3, timeout=300):
                     f.write(chunk)
             except requests.exceptions.RequestException as e:
                 logger.error("Failed reading from xnat")
-                raise (e)
+                raise e
             except IOError as e:
                 logger.error("Failed writing to file")
-                raise (e)
+                raise e
+        return None
 
     def _make_xnat_query(self, url, retries=3, timeout=150):
         try:
@@ -1156,9 +1147,8 @@ def _make_xnat_query(self, url, retries=3, timeout=150):
                 return self._make_xnat_query(
                     url, retries=retries - 1, timeout=timeout * 2
                 )
-            else:
-                logger.error(f"Xnat server timed out getting url {url}")
-                raise e
+            logger.error(f"Xnat server timed out getting url {url}")
+            raise e
 
         if response.status_code == 401:
             # possibly the session has timed out
@@ -1169,12 +1159,14 @@ def _make_xnat_query(self, url, retries=3, timeout=150):
         if response.status_code == 404:
             logger.info(
                 f"No records returned from xnat server for query: {url}")
-            return
-        elif not response.status_code == requests.codes.ok:
+            return None
+
+        if response.status_code != 200:
             logger.error(f"Failed connecting to xnat server {self.server} "
                          f"with response code {response.status_code}")
             logger.debug("Username: {}")
             response.raise_for_status()
+
         return response.json()
 
     def _make_xnat_xml_query(self, url, retries=3):
@@ -1183,8 +1175,7 @@ def _make_xnat_xml_query(self, url, retries=3):
         except requests.exceptions.Timeout as e:
             if retries > 0:
                 return self._make_xnat_xml_query(url, retries=retries - 1)
-            else:
-                raise e
+            raise e
 
         if response.status_code == 401:
             # possibly the session has timed out
@@ -1194,19 +1185,22 @@ def _make_xnat_xml_query(self, url, retries=3):
 
         if response.status_code == 404:
             logger.info(f"No records returned from xnat server to query {url}")
-            return
-        elif not response.status_code == requests.codes.ok:
+            return None
+        if response.status_code != 200:
             logger.error(f"Failed connecting to xnat server {self.server}"
                          f" with response code {response.status_code}")
-            logger.debug("Username: {}")
+            logger.debug(f"Username: {self.auth[0]}")
             response.raise_for_status()
         root = ElementTree.fromstring(response.content)
         return root
 
     def _make_xnat_put(self, url, retries=3):
+        """Modify XNAT contents.
+        """
         if retries == 0:
-            logger.info(f"Timed out making xnat put {url}")
-            requests.exceptions.HTTPError()
+            raise requests.exceptions.HTTPError(
+                f"Timed out adding data to xnat {url}"
+            )
 
         try:
             response = self.session.put(url, timeout=30)
@@ -1224,8 +1218,11 @@ def _make_xnat_put(self, url, retries=3):
                 f"http client error at folder creation: {response.status_code}"
             )
             response.raise_for_status()
+        return None
 
-    def _make_xnat_post(self, url, data, retries=3, headers=None):
+    def make_xnat_post(self, url, data, retries=3, headers=None):
+        """Add data to XNAT.
+        """
         logger.debug(f"POSTing data to xnat, {retries} retries left")
         response = self.session.post(url,
                                      headers=headers,
@@ -1244,7 +1241,7 @@ def _make_xnat_post(self, url, data, retries=3, headers=None):
             if retries:
                 logger.warning("xnat server timed out, retrying")
                 time.sleep(30)
-                self._make_xnat_post(url, data, retries=retries - 1)
+                self.make_xnat_post(url, data, retries=retries - 1)
             else:
                 logger.warning("xnat server timed out, giving up")
                 response.raise_for_status()
@@ -1258,10 +1255,9 @@ def _make_xnat_post(self, url, data, retries=3, headers=None):
             if "Unable to identify experiment" in reply:
                 raise XnatException("Unable to identify experiment, did "
                                     "dicom upload fail?")
-            else:
-                raise XnatException("An unknown error occured uploading data."
-                                    f"Status code: {response.status_code}, "
-                                    f"reason: {reply}")
+            raise XnatException("An unknown error occured uploading data."
+                                f"Status code: {response.status_code}, "
+                                f"reason: {reply}")
         return reply
 
     def _make_xnat_delete(self, url, retries=3):
@@ -1280,6 +1276,7 @@ def _make_xnat_delete(self, url, retries=3):
             logger.warning(
                 f"http client error deleting resource: {response.status_code}")
             response.raise_for_status()
+        return None
 
     def __str__(self):
         return f"<datman.xnat.xnat {self.server}>"
diff --git a/pyproject.toml b/pyproject.toml
index 7ed546dd..4495cff4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,5 +116,12 @@ logging-format-style = "new"
 disable = [
   "logging-format-interpolation",
   "too-many-lines",
-  "logging-fstring-interpolation"
+  "logging-fstring-interpolation",
+  "too-many-instance-attributes",
+  "too-many-arguments",
+  "too-many-positional-arguments",
+  "too-few-public-methods"
 ]
+
+[tool.pylint.format]
+max-line-length = 79
\ No newline at end of file

From f25590442f48e5d0414235e6fc8c2712830ca127 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Mon, 17 Mar 2025 16:39:57 -0400
Subject: [PATCH 11/45] [FIX] Stop repeated export for NiiLinkExporter

---
 datman/exporters.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/datman/exporters.py b/datman/exporters.py
index 339d8b3d..0cf86d07 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -999,6 +999,9 @@ def export(self, *args, **kwargs):
                         f"mapping {self.name_map}")
             return
 
+        if self.outputs_exist():
+            return
+
         self.make_output_dir()
         for dm_name, bids_name in self.name_map.items():
             if bids_name == "missing":
@@ -1407,6 +1410,9 @@ def _get_file(self, fname, ext):
         """
         found = os.path.join(self.nii_path, fname + ext)
         if not os.path.exists(found):
+            bl_found = os.path.join(self.nii_path, 'blacklisted', fname + ext)
+            if os.path.exists(bl_found):
+                return bl_found
             logger.debug(f"File not found {found}")
             return None
         return found

From 0a6d72ca583d031311f1e0751020b76c4fa222b4 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Mon, 17 Mar 2025 16:40:20 -0400
Subject: [PATCH 12/45] [FIX] Update timeout for uploading zip files

---
 datman/xnat.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/datman/xnat.py b/datman/xnat.py
index 3cfdc7e1..dfae2e5f 100644
--- a/datman/xnat.py
+++ b/datman/xnat.py
@@ -699,7 +699,8 @@ def get_resource_list(self, study, session, experiment, resource_id):
 
         return items
 
-    def put_dicoms(self, project, subject, experiment, filename, retries=3):
+    def put_dicoms(self, project, subject, experiment, filename, retries=3,
+                   timeout=7200):
         """Upload an archive of dicoms to XNAT
         filename: archive to upload"""
         headers = {"Content-Type": "application/zip"}
@@ -711,7 +712,13 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3):
 
         try:
             with open(filename, "rb") as data:
-                self._make_xnat_post(upload_url, data, retries, headers)
+                self._make_xnat_post(upload_url, data, retries=retries,
+                                     headers=headers, timeout=timeout)
+        except requests.exception.Timeout as e:
+            if retries == 1:
+                raise e
+            self.put_dicoms(project, subject, experiment, filename,
+                            retries=retries-1, timeout=timeout+1200)
         except XnatException as e:
             e.study = project
             e.session = experiment
@@ -1225,12 +1232,12 @@ def _make_xnat_put(self, url, retries=3):
             )
             response.raise_for_status()
 
-    def _make_xnat_post(self, url, data, retries=3, headers=None):
+    def _make_xnat_post(self, url, data, retries=3, headers=None, timeout=3600):
         logger.debug(f"POSTing data to xnat, {retries} retries left")
         response = self.session.post(url,
                                      headers=headers,
                                      data=data,
-                                     timeout=60 * 60)
+                                     timeout=timeout)
 
         reply = str(response.content)
 

From 169271b62d99d4aa2370d019dba6e1a8af500724 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Mon, 17 Mar 2025 17:18:09 -0400
Subject: [PATCH 13/45] [FIX] Attribute referenced before assignment

---
 datman/importers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datman/importers.py b/datman/importers.py
index c6d0bb0f..680fafc9 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -983,11 +983,11 @@ def __init__(self, ident, zip_path):
         self.ident = ident
         self.name = zip_path
         self.path = zip_path
-        self.date = self.scans[0].date
         self.contents = self.parse_contents()
         self.scans = self.get_scans()
         self.resource_files = self.contents['resources']
         self.dcm_subdir = os.path.split(self.scans[0].series_dir)[0]
+        self.date = self.scans[0].date
 
     @property
     def ident(self) -> 'datman.scanid.Identifier':

From 4549a6eed1180830f10211bee4933b494cef917b Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Tue, 18 Mar 2025 12:24:59 -0400
Subject: [PATCH 14/45] [FIX] Fix ZipImporter string and let NiiLinkExporter
 remove broken symlinks

---
 datman/exporters.py | 10 ++++++++++
 datman/importers.py |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index 048e07bb..451873db 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -1061,6 +1061,16 @@ def make_link(self, dm_file, bids_file):
         for source in glob(bids_file + '*'):
             ext = get_extension(source)
             target = base_target + ext
+
+            if os.path.islink(target) and not os.path.exists(target):
+                # Remove a broken symlink
+                try:
+                    os.unlink(target)
+                except Exception as exc:
+                    logger.error(
+                        f"Failed to remove broken symlink {target} - {exc}")
+                    continue
+
             rel_source = get_relative_source(source, target)
             try:
                 os.symlink(rel_source, target)
diff --git a/datman/importers.py b/datman/importers.py
index 680fafc9..f48954b2 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -1129,7 +1129,7 @@ def get_scans(self) -> list['ZipSeriesImporter']:
         return list(scans.values())
 
     def __str__(self):
-        return f"<ZipImporter {self.path}"
+        return f"<ZipImporter {self.path}>"
 
     def __repr__(self):
         return self.__str__()

From 276e9b33b8917ae7357290bd707d95b35f0e06e2 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Tue, 18 Mar 2025 13:21:19 -0400
Subject: [PATCH 15/45] [FIX] Ensure is_usable is consistent across
 SeriesImporters

---
 datman/importers.py | 70 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/datman/importers.py b/datman/importers.py
index f48954b2..3ba6498c 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -146,6 +146,15 @@ def subject(self) -> str:
         on XNAT).
         """
 
+    @property
+    @abstractmethod
+    def experiment(self) -> str:
+        """The experiment ID of the session this scan belongs to.
+
+        The experiment ID should be the 'full' ID of the session (i.e. with
+        all ID fields included).
+        """
+
     @property
     @abstractmethod
     def description(self) -> str:
@@ -171,8 +180,8 @@ def uid(self) -> str:
         """
 
     @abstractmethod
-    def is_usable(self) -> bool:
-        """Indicates whether the series contains usable dcm files.
+    def raw_dicoms_exist(self) -> bool:
+        """Indicates whether the series contains dicom files.
         """
 
     @abstractmethod
@@ -210,6 +219,37 @@ def is_derived(self) -> bool:
             return True
         return False
 
+    def is_usable(self, strict=False):
+        """Indicates whether the series contains usable dcm files.
+
+        Args:
+            strict (bool, optional): If set, 'derived' scans will be marked
+                unusable.
+        """
+        if not self.raw_dicoms_exist():
+            logger.debug(f"Ignoring {self.series} for {self.experiment}. "
+                         f"No RAW dicoms exist.")
+            return False
+
+        if not self.description:
+            logger.error(f"Can't find description for series {self.series} "
+                         f"from session {self.experiment}.")
+            return False
+
+        if not strict:
+            return True
+
+        if self.is_derived():
+            logger.debug(
+                f"Series {self.series} in session {self.experiment} is a "
+                "derived scan. Ignoring.")
+            return False
+
+        if not self.names:
+            return False
+
+        return True
+
 
 class XNATObject(ABC):
     """A meta class for classes that manage XNAT contents.
@@ -683,6 +723,14 @@ def subject(self) -> str:
     def subject(self, value: str):
         self._subject = value
 
+    @property
+    def experiment(self) -> str:
+        return self._experiment
+
+    @experiment.setter
+    def experiment(self, value: str):
+        self._experiment = value
+
     @property
     def description(self) -> str:
         return self._description
@@ -1109,7 +1157,7 @@ def get_scans(self) -> list['ZipSeriesImporter']:
         for sub_path, header in headers.items():
             # .get_full_subjectid may need to be changed for compatibility
             zip_scan = ZipSeriesImporter(
-                    self.ident.get_full_subjectid(), self.path, sub_path,
+                    self.ident, self.path, sub_path,
                     header, self.contents['scans'][sub_path]
             )
             if zip_scan.series in scans:
@@ -1140,8 +1188,10 @@ class ZipSeriesImporter(SeriesImporter):
     """
 
     # pylint: disable-next=too-many-arguments,too-many-positional-arguments
-    def __init__(self, subject, zip_file, series_dir, header, zip_items):
-        self.subject = subject
+    def __init__(self, ident, zip_file, series_dir, header, zip_items):
+        self.ident = ident
+        self.subject = ident.get_full_subjectid()
+        self.experiment = ident.get_full_subjectid_with_timepoint_session()
         self.zip_file = zip_file
         self.series_dir = series_dir
         self.header = header
@@ -1179,6 +1229,14 @@ def subject(self) -> str:
     def subject(self, value: str):
         self._subject = value
 
+    @property
+    def experiment(self) -> str:
+        return self._experiment
+
+    @experiment.setter
+    def experiment(self, value: str):
+        self._experiment = value
+
     @property
     def description(self) -> str:
         return self._description
@@ -1215,7 +1273,7 @@ def uid(self) -> list[str]:
     def uid(self, value: list[str]):
         self._uid = value
 
-    def is_usable(self):
+    def raw_dicoms_exist(self) -> bool:
         return any(item.endswith(".dcm") for item in self.contents)
 
     def get_files(self, dest_dir: str, *args, **kwargs):

From ac398d859d14bebe2dbc8132bd5e7383cd3fda12 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Wed, 19 Mar 2025 20:19:04 -0400
Subject: [PATCH 16/45] [FIX] Added required additional attributes

---
 datman/importers.py | 56 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/datman/importers.py b/datman/importers.py
index 3ba6498c..41d25c6c 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -173,12 +173,26 @@ def image_type(self) -> str:
         """The ImageType from the dicom headers.
         """
 
+    @property
+    @abstractmethod
+    def type(self) -> str:
+        """The 'XnatType' or an equivalent (usually equals series description).
+        """
+
     @property
     @abstractmethod
     def uid(self) -> str:
         """The UID from the dicom headers.
         """
 
+    @property
+    @abstractmethod
+    def echo_dict(self) -> dict:
+        """A dictionary mapping each echo to its intended output name.
+
+        This will remain an empty dict if the scan is not multi echo.
+        """
+
     @abstractmethod
     def raw_dicoms_exist(self) -> bool:
         """Indicates whether the series contains dicom files.
@@ -200,6 +214,11 @@ def set_tag(self, tag_map):
         """Set the scan tag for the scan.
         """
 
+    @abstractmethod
+    def is_multiecho(self) -> bool:
+        """Check if the series is multiecho.
+        """
+
     def _mangle_descr(self) -> str:
         """Modify a series description to remove non-alphanumeric characters.
         """
@@ -685,7 +704,7 @@ def __init__(self, experiment, scan_json):
         self.image_type = self.get_field("parameters/imageType")
         self.multiecho = self.is_multiecho()
         self.description = self._set_description()
-        self.type = self.get_field("type")
+        self._type = self.get_field("type")
         self.names = []
         self.echo_dict = {}  # Will remain empty unless scan is multi-echo
         self.tags = []
@@ -763,13 +782,25 @@ def uid(self) -> list[str]:
     def uid(self, value: list[str]):
         self._uid = value
 
+    @property
+    def type(self) -> str:
+        return self._type
+
+    @property
+    def echo_dict(self) -> dict:
+        return self._echo_dict
+
+    @echo_dict.setter
+    def echo_dict(self, value):
+        self._echo_dict = value
+
     def _set_description(self):
         series_descr = self.get_field("series_description")
         if series_descr:
             return series_descr
         return self.get_field("type")
 
-    def is_multiecho(self):
+    def is_multiecho(self) -> bool:
         """Check if the series is multiecho.
         """
         try:
@@ -1204,6 +1235,7 @@ def __init__(self, ident, zip_file, series_dir, header, zip_items):
         self.names = []
         self.tags = []
         self.dcm_dir = None
+        self.echo_dict = {}
 
     @property
     def dcm_dir(self) -> str:
@@ -1273,6 +1305,26 @@ def uid(self) -> list[str]:
     def uid(self, value: list[str]):
         self._uid = value
 
+    @property
+    def type(self) -> str:
+        return self.description
+
+    @property
+    def echo_dict(self) -> dict:
+        return self._echo_dict
+
+    @echo_dict.setter
+    def echo_dict(self, value):
+        self._echo_dict = value
+
+    def is_multiecho(self) -> bool:
+        """Check if the series is multiecho.
+
+        This can't be determined without the configuration files so will
+        be False until set_datman_name() has been called at least once.
+        """
+        return self.echo_dict
+
     def raw_dicoms_exist(self) -> bool:
         return any(item.endswith(".dcm") for item in self.contents)
 

From cb1ea630ec37c829b8095b285a2b628a11b60701 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 20 Mar 2025 16:48:11 -0400
Subject: [PATCH 17/45] [FIX] typo when referencing timeout exception

---
 datman/xnat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datman/xnat.py b/datman/xnat.py
index e84d5a38..ad2f995a 100644
--- a/datman/xnat.py
+++ b/datman/xnat.py
@@ -712,7 +712,7 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3,
             with open(filename, "rb") as data:
                 self.make_xnat_post(upload_url, data, retries=retries,
                                     headers=headers, timeout=timeout)
-        except requests.exception.Timeout as e:
+        except requests.exceptions.Timeout as e:
             if retries == 1:
                 raise e
             self.put_dicoms(project, subject, experiment, filename,

From d5c7744822cbccf91472c368b94c1835a1b93ea6 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Mon, 24 Mar 2025 17:43:43 -0400
Subject: [PATCH 18/45] [FIX] Greatly increase timeout value, update upload
 options

---
 datman/xnat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datman/xnat.py b/datman/xnat.py
index ad2f995a..bb7261a2 100644
--- a/datman/xnat.py
+++ b/datman/xnat.py
@@ -698,7 +698,7 @@ def get_resource_list(self, study, session, experiment, resource_id):
         return items
 
     def put_dicoms(self, project, subject, experiment, filename, retries=3,
-                   timeout=7200):
+                   timeout=86400):
         """Upload an archive of dicoms to XNAT
         filename: archive to upload"""
         headers = {"Content-Type": "application/zip"}
@@ -706,7 +706,7 @@ def put_dicoms(self, project, subject, experiment, filename, retries=3,
         upload_url = (
             f"{self.server}/data/services/import?project={project}"
             f"&subject={subject}&session={experiment}&overwrite=delete"
-            "&prearchive=false&inbody=true")
+            "&prearchive=false&Ignore-Unparsable=true&inbody=true")
 
         try:
             with open(filename, "rb") as data:

From dcd98e14784ede89b11c4db753626ecdc742b677 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Mon, 24 Mar 2025 17:46:56 -0400
Subject: [PATCH 19/45] [FIX] Update function reference

---
 bin/dm_xnat_upload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/dm_xnat_upload.py b/bin/dm_xnat_upload.py
index ef3c6686..e0f7264c 100755
--- a/bin/dm_xnat_upload.py
+++ b/bin/dm_xnat_upload.py
@@ -321,7 +321,7 @@ def check_files_exist(archive, xnat_experiment, xnat):
         logger.error("Failed getting zip file headers for: {}".format(archive))
         return False, False
 
-    xnat_resources = xnat_experiment.get_resources(xnat)
+    xnat_resources = xnat_experiment.get_resource_uris(xnat)
 
     if not local_headers:
         resources_exist = resource_data_exists(xnat_resources, archive)

From 0edbcaccc78d8f27bf9f6276edb4832a6d145f67 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Mon, 24 Mar 2025 19:15:09 -0400
Subject: [PATCH 20/45] [FIX] Correct errors with zip parsing and alternate
 file types

---
 datman/importers.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/datman/importers.py b/datman/importers.py
index 41d25c6c..295312be 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -1166,19 +1166,25 @@ def parse_contents(self) -> dict:
             'resources': []
         }
         with ZipFile(self.path, "r") as fh:
-            par_dir = fh.filelist[0].filename.strip('/')
-            for item in fh.filelist[1:]:
+            for item in fh.filelist:
                 if item.is_dir():
-                    contents['scans'].setdefault(item.filename.strip('/'), [])
-                else:
+                    continue
+
+                if self.is_scan(item.filename):
                     folder, _ = os.path.split(item.filename)
-                    if folder == par_dir:
-                        contents['resources'].append(item.filename)
-                    else:
-                        contents['scans'].setdefault(folder, []).append(
-                            item.filename)
+                    contents['scans'].setdefault(folder, []).append(
+                        item.filename)
+                else:
+                    contents['resources'].append(item.filename)
         return contents
 
+    def is_scan(self, fname):
+        if fname.endswith(".dcm"):
+            return True
+        if fname.endswith(".IMA"):
+            return True
+        return False
+
     def get_scans(self) -> list['ZipSeriesImporter']:
         """Get ZipSeriesImporters for each scan in the session.
         """
@@ -1186,11 +1192,15 @@ def get_scans(self) -> list['ZipSeriesImporter']:
         scans = {}
         duplicate_series = set()
         for sub_path, header in headers.items():
-            # .get_full_subjectid may need to be changed for compatibility
-            zip_scan = ZipSeriesImporter(
-                    self.ident, self.path, sub_path,
-                    header, self.contents['scans'][sub_path]
-            )
+            try:
+                zip_scan = ZipSeriesImporter(
+                        self.ident, self.path, sub_path,
+                        header, self.contents['scans'][sub_path]
+                )
+            except KeyError:
+                logger.error(f"Subdirectory {sub_path} not found in contents for {self.path}.")
+                continue
+
             if zip_scan.series in scans:
                 duplicate_series.add(zip_scan.series)
             else:

From b4006dcdc6a0cf5617561db7e1024ece193934c8 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Wed, 26 Mar 2025 20:00:48 -0400
Subject: [PATCH 21/45] [FIX] Allow zip parsing when dicoms have no extension

---
 datman/importers.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/datman/importers.py b/datman/importers.py
index 295312be..fa5980de 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -13,6 +13,7 @@
 import os
 import re
 import shutil
+from pathlib import Path
 from zipfile import ZipFile, BadZipFile
 
 from datman.exceptions import ParseException, XnatException
@@ -1179,9 +1180,21 @@ def parse_contents(self) -> dict:
         return contents
 
     def is_scan(self, fname):
-        if fname.endswith(".dcm"):
+        item = Path(fname)
+        ext = item.suffix
+        if ext == ".dcm":
             return True
-        if fname.endswith(".IMA"):
+        if ext.upper() == ".IMA":
+            return True
+        if (item.parent.name.upper() == "DICOM" or
+                item.parent.name.upper() == "SECONDARY"):
+            # Some zip files label their folders 'dicom' but the files
+            # themself have no extension and are labelled by UID, in which
+            # case 'ext' will look like a floating point number
+            try:
+                float(ext)
+            except ValueError:
+                return False
             return True
         return False
 

From ef8724e3e334bef2c7542e0a47f644ee1d168aa8 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Apr 2025 12:18:50 -0400
Subject: [PATCH 22/45] [FIX] Update scan importer repr + bids inventory to
 catch error files

Error files needed the session number to be match-able to the
right scan session + bids inventory needs to include error files
to ensure blacklisting removal etc. can correctly handle them.
---
 datman/importers.py | 10 ++++++++--
 datman/scan.py      | 31 +++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/datman/importers.py b/datman/importers.py
index fa5980de..3e9d6d1d 100644
--- a/datman/importers.py
+++ b/datman/importers.py
@@ -270,6 +270,12 @@ def is_usable(self, strict=False):
 
         return True
 
+    @property
+    def str_repr(self):
+        """Provide a consistent repr for all subclasses
+        """
+        return f"{self.experiment} - {self.series}"
+
 
 class XNATObject(ABC):
     """A meta class for classes that manage XNAT contents.
@@ -1049,7 +1055,7 @@ def _fix_download_name(self, output_dir):
                         return
 
     def __str__(self):
-        return f"<XNATScan {self.experiment} - {self.series}>"
+        return f"<XNATScan {self.str_repr}>"
 
     def __repr__(self):
         return self.__str__()
@@ -1396,7 +1402,7 @@ def set_tag(self, tag_map):
         return {}
 
     def __str__(self):
-        return f"<ZipSeriesImporter {self.series} - {self.description}>"
+        return f"<ZipSeriesImporter {self.str_repr}>"
 
     def __repr__(self):
         return self.__str__()
diff --git a/datman/scan.py b/datman/scan.py
index bc8812bd..9a6c1f39 100644
--- a/datman/scan.py
+++ b/datman/scan.py
@@ -9,9 +9,14 @@
 """
 import glob
 import os
+import re
+import logging
 
 import datman.scanid as scanid
 import datman.utils
+from datman.exceptions import ParseException
+
+logger = logging.getLogger(__name__)
 
 
 class DatmanNamed(object):
@@ -197,6 +202,13 @@ def _make_bids_inventory(self):
                 continue
 
             for item in files:
+                if item.endswith(".err"):
+                    err_file = os.path.join(path, item)
+                    ident, series = self._parse_err_file(err_file)
+                    if ident and ident.session == self.session:
+                        inventory.setdefault(series, []).append(err_file)
+                    continue
+
                 if not item.endswith(".json"):
                     continue
 
@@ -220,6 +232,25 @@ def _make_bids_inventory(self):
 
         return inventory
 
+    def _parse_err_file(self, fname):
+        with open(fname, "r") as fh:
+            lines = fh.readlines()
+
+        regex = ".*<.*Importer (.*) - ([0-9]+)>*"
+        match = re.match(regex, lines[0])
+        if not match:
+            logger.error(f"Can't parse error file - {fname}")
+            return None, None
+
+        subid, series = match.groups()
+        try:
+            ident = datman.scan.parse(subid)
+        except ParseException:
+            logger.error(f"Unparseable ID found in error file - {subid}")
+            return None, series
+
+        return ident, series
+
     def get_tagged_nii(self, tag):
         try:
             matched_niftis = self.__nii_dict[tag]

From 81bee314423264105b7f810967ab7be9de3278c3 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Apr 2025 13:51:44 -0400
Subject: [PATCH 23/45] [FIX] Update references to datman.scanid

At somepoint in the past I changed the import to scanid but
didnt fully update all references so I just switched it back
to avoid name reference errors
---
 datman/scan.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/datman/scan.py b/datman/scan.py
index 9a6c1f39..a6b505c1 100644
--- a/datman/scan.py
+++ b/datman/scan.py
@@ -12,9 +12,8 @@
 import re
 import logging
 
-import datman.scanid as scanid
+import datman.scanid
 import datman.utils
-from datman.exceptions import ParseException
 
 logger = logging.getLogger(__name__)
 
@@ -60,7 +59,7 @@ def __init__(self, path):
         path_minus_ext = path.replace(self.ext, "")
 
         try:
-            ident, tag, series, description = scanid.parse_filename(
+            ident, tag, series, description = datman.scanid.parse_filename(
                 path_minus_ext)
         except datman.scanid.ParseException:
             # re-raise the exception with a more descriptive message
@@ -148,7 +147,7 @@ def niftis(self):
     def _get_ident(self, subid):
         subject_id = self.__check_session(subid)
         try:
-            ident = scanid.parse(subject_id)
+            ident = datman.scanid.parse(subject_id)
         except datman.scanid.ParseException:
             raise datman.scanid.ParseException(
                 f"{subject_id} does not match datman convention")
@@ -245,7 +244,7 @@ def _parse_err_file(self, fname):
         subid, series = match.groups()
         try:
             ident = datman.scan.parse(subid)
-        except ParseException:
+        except datman.scanid.ParseException:
             logger.error(f"Unparseable ID found in error file - {subid}")
             return None, series
 
@@ -260,7 +259,7 @@ def get_tagged_nii(self, tag):
 
     def get_resource_dir(self, session):
         for resource_dir in self.resources:
-            ident = scanid.parse(os.path.basename(resource_dir))
+            ident = datman.scanid.parse(os.path.basename(resource_dir))
             if int(ident.session) != int(session):
                 continue
             if os.path.exists(resource_dir):

From 6f9f270d16452cc4dd0d0dad742edf52c4a958e0 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Apr 2025 14:00:58 -0400
Subject: [PATCH 24/45] [FIX] Ensure series number is always integer

---
 datman/scan.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datman/scan.py b/datman/scan.py
index a6b505c1..16f3b5da 100644
--- a/datman/scan.py
+++ b/datman/scan.py
@@ -242,6 +242,8 @@ def _parse_err_file(self, fname):
             return None, None
 
         subid, series = match.groups()
+        series = int(series)
+
         try:
             ident = datman.scan.parse(subid)
         except datman.scanid.ParseException:

From 6ff603cba0d996203072f202fb1d50a21ed96ee2 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Apr 2025 14:06:05 -0400
Subject: [PATCH 25/45] [FIX] Fix type in reference to parse function

---
 datman/scan.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datman/scan.py b/datman/scan.py
index 16f3b5da..fa555d58 100644
--- a/datman/scan.py
+++ b/datman/scan.py
@@ -245,7 +245,7 @@ def _parse_err_file(self, fname):
         series = int(series)
 
         try:
-            ident = datman.scan.parse(subid)
+            ident = datman.scanid.parse(subid)
         except datman.scanid.ParseException:
             logger.error(f"Unparseable ID found in error file - {subid}")
             return None, series

From 20902218662743548ee001a57a41e06c9c6ede61 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Apr 2025 22:12:08 -0400
Subject: [PATCH 26/45] [FIX] Move .err parser to utils, catch blacklisted err
 files

---
 datman/exporters.py | 13 ++++++++++---
 datman/scan.py      | 27 +--------------------------
 datman/utils.py     | 31 +++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index 451873db..8f80b175 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -29,7 +29,8 @@
                            make_filename, KCNIIdentifier)
 from datman.utils import (run, make_temp_directory, get_extension,
                           filter_niftis, find_tech_notes, read_blacklist,
-                          get_relative_source, read_json, write_json)
+                          get_relative_source, read_json, write_json,
+                          parse_err_file)
 
 
 try:
@@ -245,8 +246,14 @@ def check_contents(self, expected, actual):
                     )
                     if os.path.exists(err_file):
                         continue
-                    else:
-                        missing.setdefault(scan, []).append(out_name)
+
+                    blacklisted_err = os.path.join(
+                        self.output_dir, "blacklisted",
+                        os.path.basename(out_name) + "_niix.err")
+                    if os.path.exists(blacklisted_err):
+                        continue
+
+                    missing.setdefault(scan, []).append(out_name)
                 continue
 
             # Ignore split series, we can't handle these right now.
diff --git a/datman/scan.py b/datman/scan.py
index fa555d58..3f48ad05 100644
--- a/datman/scan.py
+++ b/datman/scan.py
@@ -9,14 +9,10 @@
 """
 import glob
 import os
-import re
-import logging
 
 import datman.scanid
 import datman.utils
 
-logger = logging.getLogger(__name__)
-
 
 class DatmanNamed(object):
     """
@@ -203,7 +199,7 @@ def _make_bids_inventory(self):
             for item in files:
                 if item.endswith(".err"):
                     err_file = os.path.join(path, item)
-                    ident, series = self._parse_err_file(err_file)
+                    ident, series = datman.utils._parse_err_file(err_file)
                     if ident and ident.session == self.session:
                         inventory.setdefault(series, []).append(err_file)
                     continue
@@ -231,27 +227,6 @@ def _make_bids_inventory(self):
 
         return inventory
 
-    def _parse_err_file(self, fname):
-        with open(fname, "r") as fh:
-            lines = fh.readlines()
-
-        regex = ".*<.*Importer (.*) - ([0-9]+)>*"
-        match = re.match(regex, lines[0])
-        if not match:
-            logger.error(f"Can't parse error file - {fname}")
-            return None, None
-
-        subid, series = match.groups()
-        series = int(series)
-
-        try:
-            ident = datman.scanid.parse(subid)
-        except datman.scanid.ParseException:
-            logger.error(f"Unparseable ID found in error file - {subid}")
-            return None, series
-
-        return ident, series
-
     def get_tagged_nii(self, tag):
         try:
             matched_niftis = self.__nii_dict[tag]
diff --git a/datman/utils.py b/datman/utils.py
index 7d225b86..45795747 100644
--- a/datman/utils.py
+++ b/datman/utils.py
@@ -1383,3 +1383,34 @@ def read_json(path):
 def write_json(path, contents):
     with open(path, "w") as fh:
         json.dump(contents, fh, indent=4)
+
+def parse_err_file(fname):
+    """Parse an error file that was generated during extraction.
+
+    Args:
+        fname (:obj:`str`): The full path to an error file.
+
+    Returns:
+        tuple: A tuple of a datman identifier (or None, if a valid ID does
+            not exist in the error file) and an integer series number (for
+            the series that failed to extract).
+    """
+    with open(fname, "r") as fh:
+        lines = fh.readlines()
+
+    regex = ".*<.*Importer (.*) - ([0-9]+)>*"
+    match = re.match(regex, lines[0])
+    if not match:
+        logger.error(f"Can't parse error file - {fname}")
+        return None, None
+
+    subid, series = match.groups()
+    series = int(series)
+
+    try:
+        ident = scanid.parse(subid)
+    except scanid.ParseException:
+        logger.error(f"Unparseable ID found in error file - {subid}")
+        return None, series
+
+    return ident, series
\ No newline at end of file

From 23fae5ef90146318e5fd944c086e419ef80abe4d Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Apr 2025 22:12:08 -0400
Subject: [PATCH 27/45] [FIX] Move .err parser to utils, catch blacklisted err
 files

---
 datman/exporters.py | 13 ++++++++++---
 datman/scan.py      | 27 +--------------------------
 datman/utils.py     | 31 +++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index 451873db..8f80b175 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -29,7 +29,8 @@
                            make_filename, KCNIIdentifier)
 from datman.utils import (run, make_temp_directory, get_extension,
                           filter_niftis, find_tech_notes, read_blacklist,
-                          get_relative_source, read_json, write_json)
+                          get_relative_source, read_json, write_json,
+                          parse_err_file)
 
 
 try:
@@ -245,8 +246,14 @@ def check_contents(self, expected, actual):
                     )
                     if os.path.exists(err_file):
                         continue
-                    else:
-                        missing.setdefault(scan, []).append(out_name)
+
+                    blacklisted_err = os.path.join(
+                        self.output_dir, "blacklisted",
+                        os.path.basename(out_name) + "_niix.err")
+                    if os.path.exists(blacklisted_err):
+                        continue
+
+                    missing.setdefault(scan, []).append(out_name)
                 continue
 
             # Ignore split series, we can't handle these right now.
diff --git a/datman/scan.py b/datman/scan.py
index fa555d58..795f0789 100644
--- a/datman/scan.py
+++ b/datman/scan.py
@@ -9,14 +9,10 @@
 """
 import glob
 import os
-import re
-import logging
 
 import datman.scanid
 import datman.utils
 
-logger = logging.getLogger(__name__)
-
 
 class DatmanNamed(object):
     """
@@ -203,7 +199,7 @@ def _make_bids_inventory(self):
             for item in files:
                 if item.endswith(".err"):
                     err_file = os.path.join(path, item)
-                    ident, series = self._parse_err_file(err_file)
+                    ident, series = datman.utils.parse_err_file(err_file)
                     if ident and ident.session == self.session:
                         inventory.setdefault(series, []).append(err_file)
                     continue
@@ -231,27 +227,6 @@ def _make_bids_inventory(self):
 
         return inventory
 
-    def _parse_err_file(self, fname):
-        with open(fname, "r") as fh:
-            lines = fh.readlines()
-
-        regex = ".*<.*Importer (.*) - ([0-9]+)>*"
-        match = re.match(regex, lines[0])
-        if not match:
-            logger.error(f"Can't parse error file - {fname}")
-            return None, None
-
-        subid, series = match.groups()
-        series = int(series)
-
-        try:
-            ident = datman.scanid.parse(subid)
-        except datman.scanid.ParseException:
-            logger.error(f"Unparseable ID found in error file - {subid}")
-            return None, series
-
-        return ident, series
-
     def get_tagged_nii(self, tag):
         try:
             matched_niftis = self.__nii_dict[tag]
diff --git a/datman/utils.py b/datman/utils.py
index 7d225b86..45795747 100644
--- a/datman/utils.py
+++ b/datman/utils.py
@@ -1383,3 +1383,34 @@ def read_json(path):
 def write_json(path, contents):
     with open(path, "w") as fh:
         json.dump(contents, fh, indent=4)
+
+def parse_err_file(fname):
+    """Parse an error file that was generated during extraction.
+
+    Args:
+        fname (:obj:`str`): The full path to an error file.
+
+    Returns:
+        tuple: A tuple of a datman identifier (or None, if a valid ID does
+            not exist in the error file) and an integer series number (for
+            the series that failed to extract).
+    """
+    with open(fname, "r") as fh:
+        lines = fh.readlines()
+
+    regex = ".*<.*Importer (.*) - ([0-9]+)>*"
+    match = re.match(regex, lines[0])
+    if not match:
+        logger.error(f"Can't parse error file - {fname}")
+        return None, None
+
+    subid, series = match.groups()
+    series = int(series)
+
+    try:
+        ident = scanid.parse(subid)
+    except scanid.ParseException:
+        logger.error(f"Unparseable ID found in error file - {subid}")
+        return None, series
+
+    return ident, series
\ No newline at end of file

From 0fb9d85f7635638ad118754fefb75be979fde357 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Wed, 30 Apr 2025 15:06:03 -0400
Subject: [PATCH 28/45] [FIX] Ensure zip resources extract to same path as XNAT
 resources

---
 bin/dm_xnat_extract.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index c672d311..95e5cd45 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -626,9 +626,16 @@ def export_resources(resource_dir, xnat, importer, dry_run=False):
             return
 
     if isinstance(importer, datman.importers.ZipImporter):
+        out_dir = os.path.join(resource_dir, "MISC")
+        try:
+            define_folder(out_dir)
+        except OSError:
+            logger.error(f"Failed creating target folder: {out_dir}")
+            return
         for item in importer.resource_files:
-            if not os.path.exists(item):
-                importer.get_resources(resource_dir, item)
+            dest_item = os.path.join(out_dir, item)
+            if not os.path.exists(dest_item):
+                importer.get_resources(out_dir, item)
         return
 
     xnat_experiment = importer

From 82e35d6bf515b00a8f7b8cba604d62f63255dabc Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Wed, 30 Apr 2025 19:09:01 -0400
Subject: [PATCH 29/45] [FIX] Stop index error from happening when no gold
 standards found

---
 bin/dm_qc_report.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/bin/dm_qc_report.py b/bin/dm_qc_report.py
index 8571f012..ba71033e 100755
--- a/bin/dm_qc_report.py
+++ b/bin/dm_qc_report.py
@@ -341,10 +341,15 @@ def update_dashboard(nii_path, header_ignore=None, header_tolerance=None):
     db_record = datman.dashboard.get_scan(nii_path)
 
     if REMAKE or REFRESH or db_record.is_outdated_header_diffs():
+        if db_record.gold_standards:
+            standard = db_record.gold_standards[0]
+        else:
+            standard = None
         try:
             db_record.update_header_diffs(
-                standard=db_record.gold_standards[0],
-                ignore=header_ignore, tolerance=header_tolerance)
+                standard=standard,
+                ignore=header_ignore,
+                tolerance=header_tolerance)
         except Exception as e:
             logger.error(
                 f"Failed generating header diffs for {str(db_record)} due to "

From cdcc264e60937630a6f47eec8634b251103682a8 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Wed, 21 May 2025 18:22:47 -0400
Subject: [PATCH 30/45] [FIX] Update function call

---
 bin/xnat_fetch_sessions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bin/xnat_fetch_sessions.py b/bin/xnat_fetch_sessions.py
index cc7a8263..cbbbf583 100755
--- a/bin/xnat_fetch_sessions.py
+++ b/bin/xnat_fetch_sessions.py
@@ -156,8 +156,7 @@ def download_subjects(xnat, xnat_project, destination):
 
         with datman.utils.make_temp_directory() as temp:
             try:
-                temp_zip = experiment.download(
-                    xnat, temp, zip_name=zip_name)
+                temp_zip = experiment.get_files(temp, xnat, zip_name=zip_name)
             except Exception as e:
                 logger.error("Cant download experiment {}. Reason: {}"
                              "".format(experiment, e))

From cc692e7c5affa4ad4b9333eefb38f4a3ab16c176 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 22 May 2025 16:56:38 -0400
Subject: [PATCH 31/45] [FIX] Make err file regex more general for XNAT import
 errors

---
 datman/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datman/utils.py b/datman/utils.py
index 45795747..d3935d3c 100644
--- a/datman/utils.py
+++ b/datman/utils.py
@@ -1398,7 +1398,7 @@ def parse_err_file(fname):
     with open(fname, "r") as fh:
         lines = fh.readlines()
 
-    regex = ".*<.*Importer (.*) - ([0-9]+)>*"
+    regex = ".*<.* (.*) - ([0-9]+)>*"
     match = re.match(regex, lines[0])
     if not match:
         logger.error(f"Can't parse error file - {fname}")

From c6b2660348394c4b33114957557a9df2652f0130 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Tue, 3 Jun 2025 19:13:07 -0400
Subject: [PATCH 32/45] [FIX] Started adding a fix for handling repeat sessions

---
 datman/exporters.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index 8f80b175..64d42c91 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -327,8 +327,31 @@ def get_xnat_parser(self):
         xnat_sidecars = []
         for scan in self.experiment.scans:
             xnat_sidecars.append(FakeSidecar(scan))
+
+        if int(self.session.session) > 1:
+            # Add repeat number to xnat side cars to avoid mistakenly
+            # tagging them as repeat 01
+            for sidecar in xnat_sidecars:
+                sidecar.data['Repeat'] = self.session.session
+
+            # This session is a repeat and files from previous scan(s) must
+            # be included or run numbers will be wrong.
+            for item in self.find_outputs(".json", start_dir=self.output_dir):
+                sidecar = dcm2bids.Sidecar(item)
+                if 'Repeat' not in sidecar.data:
+                    # Assume repeat == 1 if not in json file
+                    xnat_sidecars.append(sidecar)
+                elif int(sidecar.data['Repeat']) < int(self.session.session):
+                    # Avoid duplicating this sessions' previously exported files
+                    xnat_sidecars.append(sidecar)
+
+        # xnat_sidecars = sorted(
+        #     xnat_sidecars, key=lambda x: int(x.data['SeriesNumber'])
+        # )
         xnat_sidecars = sorted(
-            xnat_sidecars, key=lambda x: int(x.data['SeriesNumber'])
+            xnat_sidecars,
+            key=lambda x: (int(x.data['Repeat'] if 'Repeat' in x.data else 1),
+                           int(x.data['SeriesNumber']))
         )
 
         xnat_parser = dcm2bids.SidecarPairing(

From 224addb4e90f45a90136e969592d6da42f416acd Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 6 Jun 2025 15:52:35 -0400
Subject: [PATCH 33/45] [FIX] Handle expected scans when repeat sessions exist

---
 datman/exporters.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index 64d42c91..e82b2937 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -342,7 +342,8 @@ def get_xnat_parser(self):
                     # Assume repeat == 1 if not in json file
                     xnat_sidecars.append(sidecar)
                 elif int(sidecar.data['Repeat']) < int(self.session.session):
-                    # Avoid duplicating this sessions' previously exported files
+                    # Include previous sessions' scans without duplicating
+                    # the current sessions' entries.
                     xnat_sidecars.append(sidecar)
 
         # xnat_sidecars = sorted(
@@ -556,7 +557,12 @@ def get_xnat_map(self):
         xnat_parser = self.get_xnat_parser()
         xnat_map = {}
         for acq in xnat_parser.acquisitions:
-            xnat_map.setdefault(acq.srcSidecar.scan, []).append(acq.dstRoot)
+            try:
+                xnat_map.setdefault(acq.srcSidecar.scan, []).append(acq.dstRoot)
+            except AttributeError:
+                # acqs belonging to previous sessions don't have
+                # srcSidecar.scan and should not be in xnat_map
+                pass
         return xnat_map
 
     def get_local_map(self):

From b7aafabe449c93b73783790f3ca778ce69e9643a Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Fri, 27 Jun 2025 13:47:25 -0400
Subject: [PATCH 34/45] [FIX] Address bug when bids split series exists in
 repeat session

---
 datman/exporters.py | 127 ++++++++++++++++++++++++++++++--------------
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/datman/exporters.py b/datman/exporters.py
index e82b2937..ac8c9046 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -190,6 +190,8 @@ def __init__(self, config, session, experiment, bids_opts=None, **kwargs):
         self.bids_ses = session._ident.timepoint
         self.repeat = session._ident.session
         self.bids_folder = session.bids_root
+        self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids",
+                                     f"{session.bids_sub}_{session.bids_ses}")
         self.output_dir = session.bids_path
         self.keep_dcm = bids_opts.keep_dcm if bids_opts else False
         self.force_dcm2niix = bids_opts.force_dcm2niix if bids_opts else False
@@ -256,10 +258,36 @@ def check_contents(self, expected, actual):
                     missing.setdefault(scan, []).append(out_name)
                 continue
 
-            # Ignore split series, we can't handle these right now.
-            if len(expected[scan]) != 1:
-                continue
-            if len(actual[scan]) != 1:
+            # Handle split series
+            if len(expected[scan]) > 1:
+                xnat_parser = self.get_xnat_parser()
+                dest_acqs = []
+                for acq in xnat_parser.acquisitions:
+                    try:
+                        found_scan = acq.srcSidecar.scan
+                    except AttributeError:
+                        continue
+                    if found_scan == scan:
+                        dest_acqs.append(acq)
+
+                local_parser = self.get_local_parser()
+                src_acqs = []
+                for acq in local_parser.acquisitions:
+                    sidecar = acq.srcSidecar
+                    if str(sidecar.data['SeriesNumber']) in [scan.series, "10" + scan.series]:
+                        src_acqs.append(acq)
+
+                for src_acq in src_acqs:
+                    found = None
+                    suffix = re.sub(r'_run-\d+', '', src_acq.suffix)
+                    for dst_acq in dest_acqs:
+                        if suffix == re.sub(r'_run-\d+', '', dst_acq.suffix):
+                            found = dst_acq
+                    if not found:
+                        continue
+                    expected_name = found.dstRoot
+                    actual_name = src_acq.srcRoot.replace(self.bids_folder, "")
+                    misnamed[actual_name] = expected_name
                 continue
 
             expected_name = expected[scan][0]
@@ -305,18 +333,29 @@ def write_error_file(self, fname, error_msg):
             )
 
     def fix_run_numbers(self, misnamed_scans):
+        # Rename files already in the subject dir first, to
+        # avoid accidentally clobbering any existing misnamed files
+        # with os.rename
         for orig_name in misnamed_scans:
-            source_path = os.path.join(self.bids_folder, orig_name)
-            dest_path = os.path.join(
-                self.bids_folder, misnamed_scans[orig_name]
-            )
+            if not orig_name.startswith("sub-"):
+                continue
+            self.rename_scan(orig_name, misnamed_scans[orig_name])
+
+        for orig_name in misnamed_scans:
+            if not orig_name.startswith("tmp_dcm2bids"):
+                continue
+            self.rename_scan(orig_name, misnamed_scans[orig_name])
+
+    def rename_scan(self, orig_name, dest_name):
+        source_path = os.path.join(self.bids_folder, orig_name)
+        dest_path = os.path.join(self.bids_folder, dest_name)
 
-            if not os.path.exists(os.path.dirname(dest_path)):
-                os.makedirs(os.path.dirname(dest_path))
+        if not os.path.exists(os.path.dirname(dest_path)):
+            os.makedirs(os.path.dirname(dest_path))
 
-            for found in glob(source_path + "*"):
-                _, ext = datman.utils.splitext(found)
-                os.rename(found, dest_path + ext)
+        for found in glob(source_path + "*"):
+            _, ext = datman.utils.splitext(found)
+            os.rename(found, dest_path + ext)
 
     def get_xnat_parser(self):
         participant = dcm2bids.Participant(
@@ -328,11 +367,11 @@ def get_xnat_parser(self):
         for scan in self.experiment.scans:
             xnat_sidecars.append(FakeSidecar(scan))
 
-        if int(self.session.session) > 1:
+        if int(self.repeat) > 1:
             # Add repeat number to xnat side cars to avoid mistakenly
             # tagging them as repeat 01
             for sidecar in xnat_sidecars:
-                sidecar.data['Repeat'] = self.session.session
+                sidecar.data['Repeat'] = self.repeat
 
             # This session is a repeat and files from previous scan(s) must
             # be included or run numbers will be wrong.
@@ -341,7 +380,7 @@ def get_xnat_parser(self):
                 if 'Repeat' not in sidecar.data:
                     # Assume repeat == 1 if not in json file
                     xnat_sidecars.append(sidecar)
-                elif int(sidecar.data['Repeat']) < int(self.session.session):
+                elif int(sidecar.data['Repeat']) < int(self.repeat):
                     # Include previous sessions' scans without duplicating
                     # the current sessions' entries.
                     xnat_sidecars.append(sidecar)
@@ -381,16 +420,17 @@ def get_local_parser(self):
 
         bids_conf = dcm2bids.load_json(self.dcm2bids_config)
 
-        bids_tmp = os.path.join(
-            self.bids_folder,
-            "tmp_dcm2bids",
-            f"{self.session.bids_sub}_{self.session.bids_ses}"
-        )
-
         local_sidecars = []
-        for search_path in [self.output_dir, bids_tmp]:
+        for search_path in [self.output_dir, self.bids_tmp]:
             for item in self.find_outputs(".json", start_dir=search_path):
-                local_sidecars.append(dcm2bids.Sidecar(item))
+                sidecar = dcm2bids.Sidecar(item)
+                if ('Repeat' in sidecar.data and
+                        sidecar.data['Repeat'] == self.repeat):
+                    local_sidecars.append(sidecar)
+                elif ('Repeat' not in sidecar.data and self.repeat == '01'):
+                    # Assume untagged sidecars all belong to the first session
+                    local_sidecars.append(sidecar)
+
         local_sidecars = sorted(local_sidecars)
 
         parser = dcm2bids.SidecarPairing(
@@ -405,12 +445,7 @@ def get_local_parser(self):
     def _get_scan_dir(self, download_dir):
         if self.refresh:
             # Use existing tmp_dir instead of raw dcms
-            tmp_dir = os.path.join(
-                self.bids_folder,
-                "tmp_dcm2bids",
-                f"sub-{self.bids_sub}_ses-{self.bids_ses}"
-            )
-            return tmp_dir
+            return self.bids_tmp
         return os.path.join(download_dir, self.dcm_dir)
 
     def outputs_exist(self):
@@ -451,6 +486,11 @@ def export(self, raw_data_dir, **kwargs):
             logger.info(f"Dry run: Skipping bids export to {self.output_dir}")
             return
 
+        # Store user settings in case they change during export
+        orig_force = self.force_dcm2niix
+        orig_refresh = self.refresh
+
+
         if int(self.repeat) > 1:
             # Must force dcm2niix export if it's a repeat.
             self.force_dcm2niix = True
@@ -460,7 +500,7 @@ def export(self, raw_data_dir, **kwargs):
         try:
             self.run_dcm2bids(raw_data_dir)
         except Exception as e:
-            print(f"Failed to extract data. {e}")
+            logger.error(f"Failed to extract data. {e}")
 
         try:
             self.add_repeat_num()
@@ -471,6 +511,18 @@ def export(self, raw_data_dir, **kwargs):
                 "incorrectly be tagged as belonging to the later repeat."
             )
 
+        if int(self.repeat) > 1:
+            # Must run a second time to move the new niftis out of the tmp dir
+            self.force_dcm2niix = False
+            self.refresh = True
+            try:
+                self.run_dcm2bids(raw_data_dir)
+            except Exception as e:
+                logger.error(f"Failed to extract data. {e}")
+
+        self.force_dcm2niix = orig_force
+        self.refresh = orig_refresh
+
     def run_dcm2bids(self, raw_data_dir, tries=2):
         if tries == 0:
             logger.error(f"Dcm2bids failed to run for {self.output_dir}.")
@@ -573,7 +625,7 @@ def get_local_map(self):
         for acq in local_parser.acquisitions:
             sidecar = acq.srcSidecar
             if ('Repeat' in sidecar.data and
-                    sidecar.data['Repeat'] != self.session.session):
+                    sidecar.data['Repeat'] != self.repeat):
                 continue
             if 'SeriesNumber' not in sidecar.data:
                 continue
@@ -640,6 +692,7 @@ def find_outputs(self, ext, start_dir=None):
 
     def get_sidecars(self):
         sidecars = self.find_outputs(".json")
+        sidecars.extend(self.find_outputs(".json", start_dir=self.bids_tmp))
         contents = {path: read_json(path) for path in sidecars}
         return contents
 
@@ -702,18 +755,12 @@ def remove_criteria(descriptions):
 
         bids_conf = dcm2bids.load_json(self.dcm2bids_config)
 
-        bids_tmp = os.path.join(
-            self.bids_folder,
-            "tmp_dcm2bids",
-            f"{self.session.bids_sub}_{self.session.bids_ses}"
-        )
-
         local_sidecars = []
-        for search_path in [self.output_dir, bids_tmp]:
+        for search_path in [self.output_dir, self.bids_tmp]:
             for item in self.find_outputs(".json", start_dir=search_path):
                 sidecar = dcm2bids.Sidecar(item)
                 if ('Repeat' in sidecar.data and
-                        sidecar.data['Repeat'] != self.session.session):
+                        sidecar.data['Repeat'] != self.repeat):
                     continue
                 local_sidecars.append(sidecar)
         local_sidecars = sorted(local_sidecars)

From 755b5dbbbd5f311bbba9a710c087418771f20fbf Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Fri, 27 Jun 2025 18:55:07 -0400
Subject: [PATCH 35/45] [FIX] Add check for direction as well, if configured

---
 datman/exporters.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datman/exporters.py b/datman/exporters.py
index ac8c9046..617a4fb8 100644
--- a/datman/exporters.py
+++ b/datman/exporters.py
@@ -1005,6 +1005,7 @@ def _find_matching_files(self, bids_names, bids_conf):
         matches = self._filter_bids(
             matches, bids_conf.get(self._get_label_key(bids_conf)))
         matches = self._filter_bids(matches, bids_conf.get('task'))
+        matches = self._filter_bids(matches, bids_conf.get('dir'))
         # The below is used to more accurately match FMAP tags
         matches = self._filter_bids(matches, bids_conf.get('match_acq'))
         return matches

From c45ab66aea99c14a1e57482a2f728b9bbbb12235 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Fri, 25 Jul 2025 15:58:17 -0400
Subject: [PATCH 36/45] [REF] Reorganize the exporters into submodules

To prep for multiple versions of dcm2bids I've divided up
the exporters and the bids classes will now be imported based on
user environment.
---
 datman/exporters/__init__.py                  | 102 +++
 datman/exporters/base.py                      | 122 +++
 .../bids_legacy.py}                           | 794 +-----------------
 datman/exporters/dashboard.py                 | 399 +++++++++
 datman/exporters/legacy.py                    | 225 +++++
 5 files changed, 875 insertions(+), 767 deletions(-)
 create mode 100644 datman/exporters/__init__.py
 create mode 100644 datman/exporters/base.py
 rename datman/{exporters.py => exporters/bids_legacy.py} (60%)
 create mode 100644 datman/exporters/dashboard.py
 create mode 100644 datman/exporters/legacy.py

diff --git a/datman/exporters/__init__.py b/datman/exporters/__init__.py
new file mode 100644
index 00000000..63c62252
--- /dev/null
+++ b/datman/exporters/__init__.py
@@ -0,0 +1,102 @@
+import os
+import importlib
+import pkgutil
+import logging
+
+from .base import Exporter, SessionExporter, SeriesExporter
+
+logger = logging.getLogger(__name__)
+
+# Exclude bids from import until it's known which (if any) version of
+# dcm2bids is in use
+_exclude = {"bids", "bids_legacy"}
+
+__all__ = []
+
+
+def _load_contents(module_name):
+    """Load the contents of a module file in the 'exporters' folder.
+    """
+    module = importlib.import_module(f".{module_name}", package=__name__)
+
+    if hasattr(module, "__all__"):
+        contents = module.__all__
+    else:
+        contents = [item for item in dir(module) if not item.startswith("_")]
+
+    for item in contents:
+        globals()[item] = getattr(module, item)
+
+    __all__.extend(contents)
+
+
+# Load everything from exporters folder (except bids exporters) so contents
+# can be accessed as 'datman.exporters' instead of 'datman.exporters.xxx'
+for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(__file__)]):
+    if module_name in _exclude:
+        continue
+    _load_contents(module_name)
+
+# Load the appropriate version of the bids exporters (if any)
+DCM2BIDS_FOUND = False
+
+if os.getenv("BIDS_CONTAINER"):
+    # Container is in use, load bids.py
+    _load_contents("bids")
+    DCM2BIDS_FOUND = True
+else:
+    try:
+        from dcm2bids import dcm2bids, Dcm2bids
+    except ImportError:
+        # dcm2bids is either not installed or version >= 3
+        try:
+            import dcm2bids
+        except ImportError:
+            # No dcm2bids available at all
+            DCM2BIDS_FOUND = False
+        else:
+            # dcm2bids is installed and version > 3, use bids.py
+            _load_contents("bids")
+            DCM2BIDS_FOUND = True
+    else:
+        # dcm2bids is installed and version < 3, use bids_legacy.py
+        _load_contents("bids_legacy")
+        DCM2BIDS_FOUND = True
+
+
+def get_exporter(key: str, scope="series") -> Exporter:
+    """Find an exporter class for a given key identifier.
+
+    Args:
+        key (:obj:`str`): The 'type' identifier of a defined exporter (e.g.
+            'nii').
+        scope (:obj:`str`, optional): Whether to search for a series or session
+            exporter. Defaults to 'series'.
+
+    Returns:
+        :obj:`datman.exporters.base.Exporter`: The Exporter subclass
+            if one is defined, or else None.
+    """
+    if scope == "series":
+        exp_set = SERIES_EXPORTERS
+    else:
+        exp_set = SESSION_EXPORTERS
+
+    try:
+        exporter = exp_set[key]
+    except KeyError:
+        logger.error(
+            f"Unrecognized format {key} for {scope}, no exporters found.")
+        return None
+    return exporter
+
+
+SESSION_EXPORTERS = {
+    exp.type: exp for exp in SessionExporter.__subclasses__()
+}
+
+SERIES_EXPORTERS = {
+    exp.type: exp for exp in SeriesExporter.__subclasses__()
+}
+
+__all__.extend(["get_exporter", "SESSION_EXPORTERS", "SERIES_EXPORTERS"])
diff --git a/datman/exporters/base.py b/datman/exporters/base.py
new file mode 100644
index 00000000..6cd81237
--- /dev/null
+++ b/datman/exporters/base.py
@@ -0,0 +1,122 @@
+"""Base classes to use for any datman exporter.
+
+To allow datman to export to a new format or organizational style create a
+class that inherits from either SessionExporter if it must work on an entire
+scan session at once, or a SeriesExporter if it works on a single individual
+scan series at a time.
+"""
+
+from abc import ABC, abstractmethod
+import os
+import logging
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["SeriesExporter", "SessionExporter"]
+
+
+class Exporter(ABC):
+    """An abstract base class for all Exporters.
+    """
+
+    # Subclasses must define this
+    type = None
+
+    @classmethod
+    def get_output_dir(cls, session):
+        """Retrieve the exporter's output dir without needing an instance.
+        """
+        return getattr(session, f"{cls.type}_path")
+
+    @abstractmethod
+    def outputs_exist(self):
+        """Whether outputs have already been generated for this Exporter.
+
+        Returns:
+            bool: True if all expected outputs exist, False otherwise.
+        """
+
+    @abstractmethod
+    def needs_raw_data(self):
+        """Whether raw data must be downloaded for the Exporter.
+
+        Returns:
+            bool: True if raw data must be given, False otherwise. Note that
+                False may be returned if outputs already exist.
+        """
+
+    @abstractmethod
+    def export(self, raw_data_dir, **kwargs):
+        """Exports raw data to the current Exporter's format.
+
+        Args:
+            raw_data_dir (:obj:`str`): The directory that contains the
+                downloaded raw data.
+        """
+
+    def make_output_dir(self):
+        """Creates the directory where the Exporter's outputs will be stored.
+
+        Returns:
+            bool: True if directory exists (or isn't needed), False otherwise.
+        """
+        try:
+            os.makedirs(self.output_dir)
+        except FileExistsError:
+            pass
+        except AttributeError:
+            logger.debug(f"output_dir not defined for {self}")
+        except PermissionError:
+            logger.error(f"Failed to make output dir {self.output_dir} - "
+                         "PermissionDenied.")
+            return False
+        return True
+
+
+class SessionExporter(Exporter):
+    """A base class for exporters that take an entire session as input.
+
+    Subclasses should override __init__ (without changing basic input args)
+    and call super().__init__(config, session, experiment, **kwargs).
+
+    The init function for SessionExporter largely exists to define expected
+    input arguments and set some universally needed attributes.
+    """
+
+    def __init__(self, config, session, experiment, dry_run=False, **kwargs):
+        self.experiment = experiment
+        self.config = config
+        self.session = session
+        self.dry_run = dry_run
+
+    def __repr__(self):
+        fq_name = str(self.__class__).replace("<class '", "").replace("'>", "")
+        name = fq_name.rsplit(".", maxsplit=1)[-1]
+        return f"<{name} - {self.experiment.name}>"
+
+
+class SeriesExporter(Exporter):
+    """A base class for exporters that take a single series as input.
+    """
+
+    # Subclasses should set this
+    ext = None
+
+    def __init__(self, output_dir, fname_root, echo_dict=None, dry_run=False,
+                 **kwargs):
+        self.output_dir = output_dir
+        self.fname_root = fname_root
+        self.echo_dict = echo_dict
+        self.dry_run = dry_run
+
+    def outputs_exist(self):
+        return os.path.exists(
+            os.path.join(self.output_dir, self.fname_root + self.ext))
+
+    def needs_raw_data(self):
+        return not self.outputs_exist()
+
+    def __repr__(self):
+        fq_name = str(self.__class__).replace("<class '", "").replace("'>", "")
+        name = fq_name.rsplit(".", maxsplit=1)[-1]
+        return f"<{name} - {self.fname_root}>"
diff --git a/datman/exporters.py b/datman/exporters/bids_legacy.py
similarity index 60%
rename from datman/exporters.py
rename to datman/exporters/bids_legacy.py
index 617a4fb8..ab9d06d1 100644
--- a/datman/exporters.py
+++ b/datman/exporters/bids_legacy.py
@@ -1,181 +1,34 @@
-"""Functions to export data into different file formats and organizations.
+"""Export to bids format when using dcmbids versions below '3'.
 
-To allow datman to export to a new format make a subclass of SessionExporter
-or SeriesExporter depending on whether the new format requires data from
-a complete scan session or a single series, respectively. The new subclass
-should implement all abstract methods, including 'export' which does the
-actual work of generating outputs.
+For dcm2bids versions 3 and higher (or dcm2bids versions accessed via
+container) 'dcm2bids', 'Dcm2bids' and 'Acquisition' are not accessible so these
+exporters cannot be used.
 
-Also, ensure that subclasses define the 'type' attribute to be a short
-unique key that can be referenced in config files (e.g. 'nii').
+When using versions below '3' though, this exporter has advantages over the
+newer one. Namely, its outputs_exist() method can better check the actual
+contents of the folder against what we expect to have been exported (reducing
+manual intervention). It can also force dcm2bids to properly export repeat
+sessions into the same folder, where newer versions will simply ignore them.
 """
-from abc import ABC, abstractmethod
 from collections import OrderedDict
-from datetime import datetime
 from glob import glob
 from json import JSONDecodeError
 import logging
 import os
 import re
 
-import pydicom as dicom
-
-import datman.config
-import datman.dashboard
-import datman.scan
-from datman.exceptions import (UndefinedSetting, DashboardException,
-                               ConfigException)
-from datman.scanid import (parse_bids_filename, ParseException,
-                           make_filename, KCNIIdentifier)
-from datman.utils import (run, make_temp_directory, get_extension,
-                          filter_niftis, find_tech_notes, read_blacklist,
-                          get_relative_source, read_json, write_json,
-                          parse_err_file)
-
-
-try:
-    from dcm2bids import dcm2bids, Dcm2bids
-    from dcm2bids.sidecar import Acquisition
-except ImportError:
-    DCM2BIDS_FOUND = False
-else:
-    DCM2BIDS_FOUND = True
-
-logger = logging.getLogger(__name__)
-
-
-def get_exporter(key, scope="series"):
-    """Find an exporter class for a given key identifier.
-
-    Args:
-        key (:obj:`str`): The 'type' identifier of a defined exporter (e.g.
-            'nii').
-        scope (:obj:`str`, optional): Whether to search for a series or session
-            exporter. Defaults to 'series'.
-
-    Returns:
-        :obj:`datman.exporters.Exporter`: The Exporter subclass for the type,
-            if one is defined, or else None.
-    """
-    if scope == "series":
-        exp_set = SERIES_EXPORTERS
-    else:
-        exp_set = SESSION_EXPORTERS
-
-    try:
-        exporter = exp_set[key]
-    except KeyError:
-        logger.error(
-            f"Unrecognized format {key} for {scope}, no exporters found.")
-        return None
-    return exporter
-
-
-class Exporter(ABC):
-    """An abstract base class for all Exporters.
-    """
-
-    # Subclasses must define this
-    type = None
-
-    @classmethod
-    def get_output_dir(cls, session):
-        """Retrieve the exporter's output dir without needing an instance.
-        """
-        return getattr(session, f"{cls.type}_path")
-
-    @abstractmethod
-    def outputs_exist(self):
-        """Whether outputs have already been generated for this Exporter.
-
-        Returns:
-            bool: True if all expected outputs exist, False otherwise.
-        """
-
-    @abstractmethod
-    def needs_raw_data(self):
-        """Whether raw data must be downloaded for the Exporter.
-
-        Returns:
-            bool: True if raw data must be given, False otherwise. Note that
-                False may be returned if outputs already exist.
-        """
-
-    @abstractmethod
-    def export(self, raw_data_dir, **kwargs):
-        """Exports raw data to the current Exporter's format.
-
-        Args:
-            raw_data_dir (:obj:`str`): The directory that contains the
-                downloaded raw data.
-        """
-
-    def make_output_dir(self):
-        """Creates the directory where the Exporter's outputs will be stored.
-
-        Returns:
-            bool: True if directory exists (or isn't needed), False otherwise.
-        """
-        try:
-            os.makedirs(self.output_dir)
-        except FileExistsError:
-            pass
-        except AttributeError:
-            logger.debug(f"output_dir not defined for {self}")
-        except PermissionError:
-            logger.error(f"Failed to make output dir {self.output_dir} - "
-                         "PermissionDenied.")
-            return False
-        return True
+from datman.scanid import make_filename
+from datman.utils import (splitext, get_extension, write_json, read_json,
+                          filter_niftis, read_blacklist, get_relative_source)
 
+from dcm2bids import dcm2bids, Dcm2bids
+from dcm2bids.sidecar import Acquisition
 
-class SessionExporter(Exporter):
-    """A base class for exporters that take an entire session as input.
+from .base import SessionExporter
 
-    Subclasses should override __init__ (without changing basic input args)
-    and call super().__init__(config, session, experiment, **kwargs).
-
-    The init function for SessionExporter largely exists to define expected
-    input arguments and set some universally needed attributes.
-    """
-
-    def __init__(self, config, session, experiment, dry_run=False, **kwargs):
-        self.experiment = experiment
-        self.config = config
-        self.session = session
-        self.dry_run = dry_run
-
-    def __repr__(self):
-        fq_name = str(self.__class__).replace("<class '", "").replace("'>", "")
-        name = fq_name.rsplit(".", maxsplit=1)[-1]
-        return f"<{name} - {self.experiment.name}>"
-
-
-class SeriesExporter(Exporter):
-    """A base class for exporters that take a single series as input.
-    """
-
-    # Subclasses should set this
-    ext = None
-
-    def __init__(self, output_dir, fname_root, echo_dict=None, dry_run=False,
-                 **kwargs):
-        self.output_dir = output_dir
-        self.fname_root = fname_root
-        self.echo_dict = echo_dict
-        self.dry_run = dry_run
-
-    def outputs_exist(self):
-        return os.path.exists(
-            os.path.join(self.output_dir, self.fname_root + self.ext))
-
-    def needs_raw_data(self):
-        return not self.outputs_exist()
+logger = logging.getLogger(__name__)
 
-    def __repr__(self):
-        fq_name = str(self.__class__).replace("<class '", "").replace("'>", "")
-        name = fq_name.rsplit(".", maxsplit=1)[-1]
-        return f"<{name} - {self.fname_root}>"
+__all__ = ["BidsExporter", "NiiLinkExporter"]
 
 
 class BidsExporter(SessionExporter):
@@ -274,7 +127,8 @@ def check_contents(self, expected, actual):
                 src_acqs = []
                 for acq in local_parser.acquisitions:
                     sidecar = acq.srcSidecar
-                    if str(sidecar.data['SeriesNumber']) in [scan.series, "10" + scan.series]:
+                    if str(sidecar.data['SeriesNumber']) in [
+                            scan.series, "10" + scan.series]:
                         src_acqs.append(acq)
 
                 for src_acq in src_acqs:
@@ -354,7 +208,7 @@ def rename_scan(self, orig_name, dest_name):
             os.makedirs(os.path.dirname(dest_path))
 
         for found in glob(source_path + "*"):
-            _, ext = datman.utils.splitext(found)
+            _, ext = splitext(found)
             os.rename(found, dest_path + ext)
 
     def get_xnat_parser(self):
@@ -477,10 +331,11 @@ def export(self, raw_data_dir, **kwargs):
         if self.outputs_exist():
             return
 
-        if not DCM2BIDS_FOUND:
-            logger.info(f"Unable to export to {self.output_dir}, "
-                        "Dcm2Bids not found.")
-            return
+        # Was this ever needed? The class should never have been made.
+        # if not DCM2BIDS_FOUND:
+        #     logger.info(f"Unable to export to {self.output_dir}, "
+        #                 "Dcm2Bids not found.")
+        #     return
 
         if self.dry_run:
             logger.info(f"Dry run: Skipping bids export to {self.output_dir}")
@@ -490,7 +345,6 @@ def export(self, raw_data_dir, **kwargs):
         orig_force = self.force_dcm2niix
         orig_refresh = self.refresh
 
-
         if int(self.repeat) > 1:
             # Must force dcm2niix export if it's a repeat.
             self.force_dcm2niix = True
@@ -610,7 +464,8 @@ def get_xnat_map(self):
         xnat_map = {}
         for acq in xnat_parser.acquisitions:
             try:
-                xnat_map.setdefault(acq.srcSidecar.scan, []).append(acq.dstRoot)
+                xnat_map.setdefault(acq.srcSidecar.scan, []).append(
+                    acq.dstRoot)
             except AttributeError:
                 # acqs belonging to previous sessions don't have
                 # srcSidecar.scan and should not be in xnat_map
@@ -1164,601 +1019,6 @@ def make_link(self, dm_file, bids_file):
                 logger.error(f"Failed to create {target}. Reason - {exc}")
 
 
-class DBExporter(SessionExporter):
-    """Add a datman-style session and its contents to datman's QC dashboard.
-    """
-
-    type = "db"
-
-    def __init__(self, config, session, experiment, **kwargs):
-        try:
-            study_resource_dir = config.get_path("resources")
-        except UndefinedSetting:
-            study_resource_dir = ""
-
-        try:
-            resources_dir = os.path.join(
-                config.get_path("resources"),
-                session._ident.get_full_subjectid_with_timepoint_session()
-            )
-        except UndefinedSetting:
-            resources_dir = ""
-
-        self.nii_path = session.nii_path
-        self.output_dir = None
-        self.ident = session._ident
-        self.study_resource_path = study_resource_dir
-        self.resources_path = resources_dir
-        self.date = experiment.date
-        super().__init__(config, session, experiment, **kwargs)
-
-    @property
-    def names(self):
-        """Gets list of valid datman-style scan names for a session.
-
-        Returns:
-            :obj:`dict`: A dictionary of datman style scan names mapped to
-                the bids style name if one can be found, otherwise, an
-                empty string.
-        """
-        names = {}
-        # use experiment.scans, so dashboard can report scans that didnt export
-        for scan in self.experiment.scans:
-            for name in scan.names:
-                names[name] = self.get_bids_name(name, self.session)
-
-        # Check the actual folder contents as well, in case symlinked scans
-        # exist that werent named on XNAT
-        for nii in self.session.niftis:
-            fname = nii.file_name.replace(nii.ext, "")
-            if fname in names:
-                continue
-            names[fname] = self.get_bids_name(fname, self.session)
-
-        return names
-
-    def get_bids_name(self, dm_name, session):
-        """Get BIDS style scan name from a datman style nifti.
-
-        Returns:
-            str: A valid bids style file name or an empty string if one
-                cannot be found.
-        """
-        found = [item for item in session.find_files(dm_name)
-                 if ".nii.gz" in item]
-        if not found or not os.path.islink(found[0]):
-            return ""
-        bids_src = os.readlink(found[0])
-        bids_name = os.path.basename(bids_src)
-        return bids_name.replace(get_extension(bids_name), "")
-
-    def export(self, *args, **kwargs):
-        if self.dry_run:
-            logger.info("Dry run: Skipping database update for "
-                        f"{str(self.ident)}")
-            return
-
-        if not datman.dashboard.dash_found:
-            logger.warning("Dashboard database not found, unable to add "
-                           f"{str(self.ident)} and its contents.")
-            return
-
-        session = self.make_session()
-
-        if not session.tech_notes and session.expects_notes():
-            self.add_tech_notes(session)
-
-        for file_stem in self.names:
-            self.make_scan(file_stem)
-
-    def outputs_exist(self):
-        try:
-            session = datman.dashboard.get_session(self.ident)
-        except DashboardException:
-            return False
-        except ParseException:
-            logger.error(
-                f"Session name {self.ident} is not datman format. Ignoring.")
-            return True
-
-        if not session:
-            return False
-
-        if not session.tech_notes and session.expects_notes():
-            return False
-
-        for name in self.names:
-            try:
-                scan = datman.dashboard.get_scan(name)
-            except DashboardException:
-                return False
-            except ParseException:
-                logger.error(
-                    f"Scan name {name} is not datman format. Ignoring.")
-                continue
-
-            if not scan:
-                return False
-
-            if self.errors_outdated(scan, name):
-                return False
-
-        return True
-
-    @classmethod
-    def get_output_dir(cls, session):
-        return None
-
-    def needs_raw_data(self):
-        return False
-
-    def make_session(self):
-        """Add the current session to datman's QC database.
-
-        Returns:
-            :obj:`dashboard.models.Session`: The created scan session or None.
-        """
-        logger.debug(f"Adding session {str(self.ident)} to dashboard.")
-        try:
-            session = datman.dashboard.get_session(self.ident, create=True)
-        except datman.dashboard.DashboardException as exc:
-            logger.error(f"Failed adding session {str(self.ident)} to "
-                         f"database. Reason: {exc}")
-            return None
-
-        self._set_alt_ids(session)
-        self._set_date(session)
-
-        return session
-
-    def _set_alt_ids(self, session):
-        """Add alternate ID formats for the scan session to the database.
-
-        Args:
-            session (:obj:`dashboard.models.Session`): A valid QC dashboard
-                scan session.
-        """
-        session.timepoint.bids_name = self.ident.get_bids_name()
-        session.timepoint.bids_session = self.ident.timepoint
-        session.save()
-
-        if not isinstance(self.ident, KCNIIdentifier):
-            return
-
-        session.timepoint.kcni_name = self.ident.get_xnat_subject_id()
-        session.kcni_name = self.ident.get_xnat_experiment_id()
-        session.save()
-        return
-
-    def _set_date(self, session):
-        """Add the scan date for a scan session to the QC database.
-
-        Args:
-            session (:obj:`dashboard.models.Session`): A valid QC dashboard
-                scan session.
-        """
-        if not self.date:
-            logger.debug(f"No scan date found for {str(self.ident)}, "
-                         "leaving blank.")
-            return
-
-        try:
-            date = datetime.strptime(self.date, '%Y-%m-%d')
-        except ValueError:
-            logger.error(f"Invalid scan date {self.date} for session "
-                         f"{str(self.ident)}")
-            return
-
-        if date == session.date:
-            return
-
-        session.date = date
-        session.save()
-
-    def add_tech_notes(self, session):
-        """Add the path to a scan session's tech notes to the database.
-
-        Args:
-            session (:obj:`dashboard.models.Session`): A valid QC dashboard
-                scan session.
-        """
-        notes = find_tech_notes(self.resources_path)
-        if not notes:
-            logger.debug(f"No tech notes found in {self.resources_path}")
-            return
-
-        # Store only the path relative to the resources dir
-        session.tech_notes = notes.replace(
-            self.study_resource_path, "").lstrip("/")
-        session.save()
-
-    def make_scan(self, file_stem):
-        """Add a single scan to datman's QC dashboard.
-
-        Args:
-            file_stem (:obj:`str`): A valid datman-style file name.
-        """
-        logger.debug(f"Adding scan {file_stem} to dashboard.")
-        try:
-            scan = datman.dashboard.get_scan(file_stem, create=True)
-        except datman.dashboard.DashboardException as exc:
-            logger.error(f"Failed adding scan {file_stem} to dashboard "
-                         f"with error: {exc}")
-            return
-        if self.experiment.is_shared():
-            source_session = self._get_source_session()
-            self._make_linked(scan, source_session)
-        self._add_bids_scan_name(scan, file_stem)
-        self._add_side_car(scan, file_stem)
-        self._update_conversion_errors(scan, file_stem)
-
-    def _make_linked(self, scan, source_session):
-        try:
-            source_session = datman.dashboard.get_session(source_session)
-        except datman.dashboard.DashboardException as exc:
-            logger.error(
-                f"Failed to link shared scan {scan} to source "
-                f"{source_session}. Reason - {exc}"
-            )
-            return
-        matches = [
-            source_scan for source_scan in source_session.scans
-            if (source_scan.series == scan.series and
-                source_scan.tag == scan.tag)
-        ]
-        if not matches or len(matches) > 1:
-            logger.error(
-                f"Failed to link shared scan {scan} to {source_session}."
-                " Reason - Unable to find source scan database record."
-            )
-            return
-
-        scan.source_id = matches[0].id
-        scan.save()
-
-    def _get_source_session(self):
-        """Get the ID of the source experiment for a shared XNATExperiment."""
-        try:
-            config = datman.config.config(study=self.experiment.source_name)
-        except ConfigException:
-            return self.experiment.source_name
-
-        try:
-            id_map = config.get_key('IdMap')
-        except UndefinedSetting:
-            return self.experiment.source_name
-
-        return str(datman.scanid.parse(self.experiment.source_name, id_map))
-
-    def _add_bids_scan_name(self, scan, dm_stem):
-        """Add a bids format file name to a series in the QC database.
-
-        Args:
-            scan (:obj:`dashboard.models.Scan`): A QC dashboard scan.
-            dm_stem (:obj:`str`): A valid bids format scan name, or an
-                empty string if the update should be skipped.
-        """
-        bids_stem = self.names[dm_stem]
-        if not bids_stem:
-            return
-
-        try:
-            bids_ident = parse_bids_filename(bids_stem)
-        except ParseException:
-            logger.debug(f"Failed to parse bids file name {bids_stem}")
-            return
-        scan.add_bids(str(bids_ident))
-
-    def _add_side_car(self, scan, file_stem):
-        """Add the JSON side car contents to the QC database.
-
-        Args:
-            scan (:obj:`dashboard.models.Scan`): A QC dashboard scan.
-            file_stem (:obj:`str`): A valid datman-style file name. Used to
-                find the json side car file.
-        """
-        nii_file = self._get_file(file_stem, ".nii.gz")
-        if not nii_file:
-            # File exists on xnat but hasnt been generated.
-            return
-
-        side_car = self._get_file(file_stem, ".json")
-        if not side_car:
-            logger.error(f"Missing json side car for {file_stem}")
-            return
-
-        try:
-            scan.add_json(side_car)
-        except Exception as exc:
-            logger.error("Failed to add JSON side car to dashboard "
-                         f"record for {side_car}. Reason - {exc}")
-
-    def _update_conversion_errors(self, scan, file_stem):
-        """Add any dcm2niix conversion errors to the QC database.
-
-        Args:
-            scan (:obj:`dashboard.models.Scan`): A QC dashboard scan.
-            file_stem (:obj:`str`): A valid datman style file name. Used to
-                find the conversion error file (if one exists).
-        """
-        convert_errors = self._get_file(file_stem, ".err")
-        if not convert_errors:
-            if scan.conv_errors:
-                # Erase the error message from the DB, because it
-                # has been resolved.
-                scan.add_error(None)
-            return
-        message = self._read_file(convert_errors)
-        scan.add_error(message)
-
-    def _get_file(self, fname, ext):
-        """Find a file on the file system.
-
-        Args:
-            fname (:obj:`str`): A file name (minus extension).
-            ext (:obj:`str`): A file extension.
-
-        Returns:
-            str: The full path to the file matching the given name and
-                extension, otherwise None.
-        """
-        found = os.path.join(self.nii_path, fname + ext)
-        if not os.path.exists(found):
-            bl_found = os.path.join(self.nii_path, 'blacklisted', fname + ext)
-            if os.path.exists(bl_found):
-                return bl_found
-            logger.debug(f"File not found {found}")
-            return None
-        return found
-
-    def _read_file(self, fpath):
-        """Read the contents of a file.
-
-        Args:
-            fpath (:obj:`str`): The full path to a file.
-
-        Returns:
-            str: The contents of the file or None if the file cannot be read.
-        """
-        try:
-            with open(fpath, "r") as file_handle:
-                message = file_handle.readlines()
-        except Exception as exc:
-            logger.debug(f"Can't read file {fpath} - {exc}")
-            return None
-        return message
-
-    def errors_outdated(self, scan, fname):
-        err_file = self._get_file(fname, ".err")
-        if not err_file and scan.conv_errors:
-            # Error is resolved, but still appears in database
-            return True
-        if err_file and not scan.conv_errors:
-            # Error has appeared, but isnt recorded in database
-            return True
-        if err_file and scan.conv_errors:
-            # Error exists in both locations, but may have changed
-            message = self._read_file(err_file)
-            if isinstance(message, list):
-                message = "\n".join(message)
-            return message != scan.conv_errors
-        return False
-
-
-class NiiExporter(SeriesExporter):
-    """Export a series to nifti format with datman-style names.
-    """
-
-    ext = ".nii.gz"
-
-    type = "nii"
-
-    def export(self, raw_data_dir, **kwargs):
-        if self.dry_run:
-            logger.info(f"Dry run: Skipping export of {self.fname_root}")
-            return
-
-        if self.outputs_exist():
-            logger.debug(f"Outputs exist for {self.fname_root}, skipping.")
-            return
-
-        self.make_output_dir()
-
-        with make_temp_directory(prefix="export_nifti_") as tmp:
-            _, log_msgs = run(f'dcm2niix -z y -b y -o {tmp} {raw_data_dir}',
-                              self.dry_run)
-            for tmp_file in glob(f"{tmp}/*"):
-                self.move_file(tmp_file)
-                stem = self._get_fname(tmp_file)
-                self.report_issues(stem, str(log_msgs))
-
-    def move_file(self, gen_file):
-        """Move the temp outputs of dcm2niix to the intended output directory.
-
-        Args:
-            gen_file (:obj:`str`): The full path to the generated nifti file
-                to move.
-        """
-        fname = self._get_fname(gen_file)
-
-        if not fname:
-            return
-
-        out_file = os.path.join(self.output_dir, fname)
-        if os.path.exists(out_file):
-            logger.info(f"Output {out_file} already exists. Skipping.")
-            return
-
-        return_code, _ = run(f"mv {gen_file} {out_file}", self.dry_run)
-        if return_code:
-            logger.debug(f"Moving dcm2niix output {gen_file} to {out_file} "
-                         "has failed.")
-
-    def _get_fname(self, gen_file):
-        """Get the intended datman-style name for a generated file.
-
-        Args:
-            gen_file (:obj:`str`): The full path to the generated nifti file
-                to move.
-
-        Result:
-            str: A string filename (with extension) or an empty string.
-        """
-        ext = get_extension(gen_file)
-        bname = os.path.basename(gen_file)
-
-        if self.echo_dict:
-            stem = self._get_echo_fname(bname, ext)
-            if stem != self.fname_root:
-                # File belongs to the wrong echo, skip it
-                return ""
-        else:
-            stem = self.fname_root
-        return stem + ext
-
-    def _get_echo_fname(self, fname, ext):
-        """Get a valid datman-style file name from a multiecho file.
-
-        Args:
-            fname (:obj:`str`): A filename to parse for an echo number.
-            ext (:obj:`str`): The file extension to use.
-
-        Returns:
-            str: A valid datman-style file name or an empty string if one
-                cannot be made.
-        """
-        # Match a 14 digit timestamp and 1-3 digit series num
-        regex = "files_(.*)_([0-9]{14})_([0-9]{1,3})(.*)?" + ext
-        match = re.search(regex, fname)
-
-        if not match:
-            logger.error(f"Can't parse valid echo number from {fname}.")
-            return ""
-
-        try:
-            echo = int(match.group(4).split('e')[-1][0])
-            stem = self.echo_dict[echo]
-        except Exception:
-            logger.error(f"Can't parse valid echo number from {fname}")
-            return ""
-
-        return stem
-
-    def report_issues(self, stem, messages):
-        """Write an error log if dcm2niix had errors during conversion.
-
-        Args:
-            stem (:obj:`stem`): A valid datman-style file name (minus
-                extension).
-            messages (:obj:`str`): Error messages to write.
-        """
-        if self.dry_run:
-            logger.info(f"DRYRUN - Skipping write of error log for {stem}")
-            return
-
-        if 'missing images' not in messages:
-            # The only issue we care about currently is if files are missing
-            return
-
-        dest = os.path.join(self.output_dir, stem) + ".err"
-        self._write_error_log(dest, messages)
-
-    def _write_error_log(self, dest, messages):
-        """Write an error message to the file system.
-
-        Args:
-            dest (:obj:`str`): The full path of the file to write.
-            messages (:obj:`str`): Intended contents of the error log.
-        """
-        try:
-            with open(dest, "w") as output:
-                output.write(messages)
-        except Exception as exc:
-            logger.error(f"Failed writing dcm2niix errors to {dest}. "
-                         f"Reason - {type(exc).__name__} {exc} ")
-
-
-class DcmExporter(SeriesExporter):
-    """Export a single dicom from a scan.
-    """
-
-    type = "dcm"
-    ext = ".dcm"
-
-    def export(self, raw_data_dir, **kwargs):
-        self.make_output_dir()
-
-        if self.echo_dict:
-            self._export_multi_echo(raw_data_dir)
-            return
-
-        dcm_file = self._find_dcm(raw_data_dir)
-        if not dcm_file:
-            logger.error(f"No dicom files found in {raw_data_dir}")
-            return
-
-        logger.debug(f"Exporting a dcm file from {raw_data_dir} to "
-                     f"{self.output_dir}")
-        output = os.path.join(self.output_dir, self.fname_root + self.ext)
-        run(f"cp {dcm_file} {output}", self.dry_run)
-
-    def _find_dcm(self, raw_data_dir):
-        """Find the path to a valid dicom in the given directory.
-
-        Args:
-            raw_data_dir (:obj:`str`): The full path to the directory where
-                raw dicoms were downloaded for the series.
-
-        Returns:
-            str: the full path to the first readable dicom found.
-        """
-        for path in glob(f"{raw_data_dir}/*"):
-            try:
-                dicom.read_file(path)
-            except dicom.filereader.InvalidDicomError:
-                pass
-            else:
-                return path
-        return ""
-
-    def _export_multi_echo(self, raw_data_dir):
-        """Find a single valid dicom for each echo in a multiecho scan.
-
-        Args:
-            raw_data_dir (:obj:`str`): The full path to the directory where
-                raw dicoms were downloaded for the series.
-        """
-        dcm_dict = {}
-        for path in glob(f"{raw_data_dir}/*"):
-            try:
-                dcm_file = dicom.read_file(path)
-            except dicom.filereader.InvalidDicomError:
-                continue
-            dcm_echo_num = dcm_file.EchoNumbers
-            if dcm_echo_num not in dcm_dict:
-                dcm_dict[int(dcm_echo_num)] = path
-            if len(dcm_dict) == len(self.echo_dict):
-                break
-
-        for echo_num, dcm_echo_num in zip(self.echo_dict.keys(),
-                                          dcm_dict.keys()):
-            output_file = os.path.join(self.output_dir,
-                                       self.echo_dict[echo_num] + self.ext)
-            logger.debug(f"Exporting a dcm file from {raw_data_dir} to "
-                         f"{output_file}")
-            cmd = f"cp {dcm_dict[dcm_echo_num]} {output_file}"
-            run(cmd, self.dry_run)
-
-
-SESSION_EXPORTERS = {
-    exp.type: exp for exp in SessionExporter.__subclasses__()
-}
-
-SERIES_EXPORTERS = {
-    exp.type: exp for exp in SeriesExporter.__subclasses__()
-}
-
-
 class FakeSidecar(dcm2bids.Sidecar):
     """Turns XNAT series descriptions into pseudo-sidecars.
     """
diff --git a/datman/exporters/dashboard.py b/datman/exporters/dashboard.py
new file mode 100644
index 00000000..86cdd290
--- /dev/null
+++ b/datman/exporters/dashboard.py
@@ -0,0 +1,399 @@
+"""An exporter to push raw datman files into the QC dashboard.
+"""
+from datetime import datetime
+import logging
+import os
+
+from .base import SessionExporter
+import datman.config
+import datman.dashboard
+from datman.exceptions import (ConfigException, DashboardException,
+                               UndefinedSetting)
+from datman.scanid import (KCNIIdentifier, parse, parse_bids_filename,
+                           ParseException)
+from datman.utils import find_tech_notes, get_extension
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["DBExporter"]
+
+
+class DBExporter(SessionExporter):
+    """Add a datman-style session and its contents to datman's QC dashboard.
+    """
+
+    type = "db"
+
+    def __init__(self, config, session, experiment, **kwargs):
+        try:
+            study_resource_dir = config.get_path("resources")
+        except UndefinedSetting:
+            study_resource_dir = ""
+
+        try:
+            resources_dir = os.path.join(
+                config.get_path("resources"),
+                session._ident.get_full_subjectid_with_timepoint_session()
+            )
+        except UndefinedSetting:
+            resources_dir = ""
+
+        self.nii_path = session.nii_path
+        self.output_dir = None
+        self.ident = session._ident
+        self.study_resource_path = study_resource_dir
+        self.resources_path = resources_dir
+        self.date = experiment.date
+        super().__init__(config, session, experiment, **kwargs)
+
+    @property
+    def names(self):
+        """Gets list of valid datman-style scan names for a session.
+
+        Returns:
+            :obj:`dict`: A dictionary of datman style scan names mapped to
+                the bids style name if one can be found, otherwise, an
+                empty string.
+        """
+        names = {}
+        # use experiment.scans, so dashboard can report scans that didnt export
+        for scan in self.experiment.scans:
+            for name in scan.names:
+                names[name] = self.get_bids_name(name, self.session)
+
+        # Check the actual folder contents as well, in case symlinked scans
+        # exist that werent named on XNAT
+        for nii in self.session.niftis:
+            fname = nii.file_name.replace(nii.ext, "")
+            if fname in names:
+                continue
+            names[fname] = self.get_bids_name(fname, self.session)
+
+        return names
+
+    def get_bids_name(self, dm_name, session):
+        """Get BIDS style scan name from a datman style nifti.
+
+        Returns:
+            str: A valid bids style file name or an empty string if one
+                cannot be found.
+        """
+        found = [item for item in session.find_files(dm_name)
+                 if ".nii.gz" in item]
+        if not found or not os.path.islink(found[0]):
+            return ""
+        bids_src = os.readlink(found[0])
+        bids_name = os.path.basename(bids_src)
+        return bids_name.replace(get_extension(bids_name), "")
+
+    def export(self, *args, **kwargs):
+        if self.dry_run:
+            logger.info("Dry run: Skipping database update for "
+                        f"{str(self.ident)}")
+            return
+
+        if not datman.dashboard.dash_found:
+            logger.warning("Dashboard database not found, unable to add "
+                           f"{str(self.ident)} and its contents.")
+            return
+
+        session = self.make_session()
+
+        if not session.tech_notes and session.expects_notes():
+            self.add_tech_notes(session)
+
+        for file_stem in self.names:
+            self.make_scan(file_stem)
+
+    def outputs_exist(self):
+        try:
+            session = datman.dashboard.get_session(self.ident)
+        except DashboardException:
+            return False
+        except ParseException:
+            logger.error(
+                f"Session name {self.ident} is not datman format. Ignoring.")
+            return True
+
+        if not session:
+            return False
+
+        if not session.tech_notes and session.expects_notes():
+            return False
+
+        for name in self.names:
+            try:
+                scan = datman.dashboard.get_scan(name)
+            except DashboardException:
+                return False
+            except ParseException:
+                logger.error(
+                    f"Scan name {name} is not datman format. Ignoring.")
+                continue
+
+            if not scan:
+                return False
+
+            if self.errors_outdated(scan, name):
+                return False
+
+        return True
+
+    @classmethod
+    def get_output_dir(cls, session):
+        return None
+
+    def needs_raw_data(self):
+        return False
+
+    def make_session(self):
+        """Add the current session to datman's QC database.
+
+        Returns:
+            :obj:`dashboard.models.Session`: The created scan session or None.
+        """
+        logger.debug(f"Adding session {str(self.ident)} to dashboard.")
+        try:
+            session = datman.dashboard.get_session(self.ident, create=True)
+        except datman.dashboard.DashboardException as exc:
+            logger.error(f"Failed adding session {str(self.ident)} to "
+                         f"database. Reason: {exc}")
+            return None
+
+        self._set_alt_ids(session)
+        self._set_date(session)
+
+        return session
+
+    def _set_alt_ids(self, session):
+        """Add alternate ID formats for the scan session to the database.
+
+        Args:
+            session (:obj:`dashboard.models.Session`): A valid QC dashboard
+                scan session.
+        """
+        session.timepoint.bids_name = self.ident.get_bids_name()
+        session.timepoint.bids_session = self.ident.timepoint
+        session.save()
+
+        if not isinstance(self.ident, KCNIIdentifier):
+            return
+
+        session.timepoint.kcni_name = self.ident.get_xnat_subject_id()
+        session.kcni_name = self.ident.get_xnat_experiment_id()
+        session.save()
+        return
+
+    def _set_date(self, session):
+        """Add the scan date for a scan session to the QC database.
+
+        Args:
+            session (:obj:`dashboard.models.Session`): A valid QC dashboard
+                scan session.
+        """
+        if not self.date:
+            logger.debug(f"No scan date found for {str(self.ident)}, "
+                         "leaving blank.")
+            return
+
+        try:
+            date = datetime.strptime(self.date, '%Y-%m-%d')
+        except ValueError:
+            logger.error(f"Invalid scan date {self.date} for session "
+                         f"{str(self.ident)}")
+            return
+
+        if date == session.date:
+            return
+
+        session.date = date
+        session.save()
+
+    def add_tech_notes(self, session):
+        """Add the path to a scan session's tech notes to the database.
+
+        Args:
+            session (:obj:`dashboard.models.Session`): A valid QC dashboard
+                scan session.
+        """
+        notes = find_tech_notes(self.resources_path)
+        if not notes:
+            logger.debug(f"No tech notes found in {self.resources_path}")
+            return
+
+        # Store only the path relative to the resources dir
+        session.tech_notes = notes.replace(
+            self.study_resource_path, "").lstrip("/")
+        session.save()
+
+    def make_scan(self, file_stem):
+        """Add a single scan to datman's QC dashboard.
+
+        Args:
+            file_stem (:obj:`str`): A valid datman-style file name.
+        """
+        logger.debug(f"Adding scan {file_stem} to dashboard.")
+        try:
+            scan = datman.dashboard.get_scan(file_stem, create=True)
+        except datman.dashboard.DashboardException as exc:
+            logger.error(f"Failed adding scan {file_stem} to dashboard "
+                         f"with error: {exc}")
+            return
+        if self.experiment.is_shared():
+            source_session = self._get_source_session()
+            self._make_linked(scan, source_session)
+        self._add_bids_scan_name(scan, file_stem)
+        self._add_side_car(scan, file_stem)
+        self._update_conversion_errors(scan, file_stem)
+
+    def _make_linked(self, scan, source_session):
+        try:
+            source_session = datman.dashboard.get_session(source_session)
+        except datman.dashboard.DashboardException as exc:
+            logger.error(
+                f"Failed to link shared scan {scan} to source "
+                f"{source_session}. Reason - {exc}"
+            )
+            return
+        matches = [
+            source_scan for source_scan in source_session.scans
+            if (source_scan.series == scan.series and
+                source_scan.tag == scan.tag)
+        ]
+        if not matches or len(matches) > 1:
+            logger.error(
+                f"Failed to link shared scan {scan} to {source_session}."
+                " Reason - Unable to find source scan database record."
+            )
+            return
+
+        scan.source_id = matches[0].id
+        scan.save()
+
+    def _get_source_session(self):
+        """Get the ID of the source experiment for a shared XNATExperiment."""
+        try:
+            config = datman.config.config(study=self.experiment.source_name)
+        except ConfigException:
+            return self.experiment.source_name
+
+        try:
+            id_map = config.get_key('IdMap')
+        except UndefinedSetting:
+            return self.experiment.source_name
+
+        return str(parse(self.experiment.source_name, id_map))
+
+    def _add_bids_scan_name(self, scan, dm_stem):
+        """Add a bids format file name to a series in the QC database.
+
+        Args:
+            scan (:obj:`dashboard.models.Scan`): A QC dashboard scan.
+            dm_stem (:obj:`str`): A valid bids format scan name, or an
+                empty string if the update should be skipped.
+        """
+        bids_stem = self.names[dm_stem]
+        if not bids_stem:
+            return
+
+        try:
+            bids_ident = parse_bids_filename(bids_stem)
+        except ParseException:
+            logger.debug(f"Failed to parse bids file name {bids_stem}")
+            return
+        scan.add_bids(str(bids_ident))
+
+    def _add_side_car(self, scan, file_stem):
+        """Add the JSON side car contents to the QC database.
+
+        Args:
+            scan (:obj:`dashboard.models.Scan`): A QC dashboard scan.
+            file_stem (:obj:`str`): A valid datman-style file name. Used to
+                find the json side car file.
+        """
+        nii_file = self._get_file(file_stem, ".nii.gz")
+        if not nii_file:
+            # File exists on xnat but hasnt been generated.
+            return
+
+        side_car = self._get_file(file_stem, ".json")
+        if not side_car:
+            logger.error(f"Missing json side car for {file_stem}")
+            return
+
+        try:
+            scan.add_json(side_car)
+        except Exception as exc:
+            logger.error("Failed to add JSON side car to dashboard "
+                         f"record for {side_car}. Reason - {exc}")
+
+    def _update_conversion_errors(self, scan, file_stem):
+        """Add any dcm2niix conversion errors to the QC database.
+
+        Args:
+            scan (:obj:`dashboard.models.Scan`): A QC dashboard scan.
+            file_stem (:obj:`str`): A valid datman style file name. Used to
+                find the conversion error file (if one exists).
+        """
+        convert_errors = self._get_file(file_stem, ".err")
+        if not convert_errors:
+            if scan.conv_errors:
+                # Erase the error message from the DB, because it
+                # has been resolved.
+                scan.add_error(None)
+            return
+        message = self._read_file(convert_errors)
+        scan.add_error(message)
+
+    def _get_file(self, fname, ext):
+        """Find a file on the file system.
+
+        Args:
+            fname (:obj:`str`): A file name (minus extension).
+            ext (:obj:`str`): A file extension.
+
+        Returns:
+            str: The full path to the file matching the given name and
+                extension, otherwise None.
+        """
+        found = os.path.join(self.nii_path, fname + ext)
+        if not os.path.exists(found):
+            bl_found = os.path.join(self.nii_path, 'blacklisted', fname + ext)
+            if os.path.exists(bl_found):
+                return bl_found
+            logger.debug(f"File not found {found}")
+            return None
+        return found
+
+    def _read_file(self, fpath):
+        """Read the contents of a file.
+
+        Args:
+            fpath (:obj:`str`): The full path to a file.
+
+        Returns:
+            str: The contents of the file or None if the file cannot be read.
+        """
+        try:
+            with open(fpath, "r") as file_handle:
+                message = file_handle.readlines()
+        except Exception as exc:
+            logger.debug(f"Can't read file {fpath} - {exc}")
+            return None
+        return message
+
+    def errors_outdated(self, scan, fname):
+        err_file = self._get_file(fname, ".err")
+        if not err_file and scan.conv_errors:
+            # Error is resolved, but still appears in database
+            return True
+        if err_file and not scan.conv_errors:
+            # Error has appeared, but isnt recorded in database
+            return True
+        if err_file and scan.conv_errors:
+            # Error exists in both locations, but may have changed
+            message = self._read_file(err_file)
+            if isinstance(message, list):
+                message = "\n".join(message)
+            return message != scan.conv_errors
+        return False
diff --git a/datman/exporters/legacy.py b/datman/exporters/legacy.py
new file mode 100644
index 00000000..977e64f0
--- /dev/null
+++ b/datman/exporters/legacy.py
@@ -0,0 +1,225 @@
+"""Classes for the old-style datman exporters.
+
+These classes allow a single scan to be exported to various file formats with
+the datman naming scheme. They were datman's only export methods prior
+to 2020ish, but have been phased out in favor of using exporters that use
+the bids format.
+"""
+from glob import glob
+import logging
+import os
+import re
+
+import pydicom as dicom
+
+from .base import SeriesExporter
+from datman.utils import run, make_temp_directory, get_extension
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["NiiExporter", "DcmExporter"]
+
+
+class NiiExporter(SeriesExporter):
+    """Export a series to nifti format with datman-style names.
+    """
+
+    ext = ".nii.gz"
+
+    type = "nii"
+
+    def export(self, raw_data_dir, **kwargs):
+        if self.dry_run:
+            logger.info(f"Dry run: Skipping export of {self.fname_root}")
+            return
+
+        if self.outputs_exist():
+            logger.debug(f"Outputs exist for {self.fname_root}, skipping.")
+            return
+
+        self.make_output_dir()
+
+        with make_temp_directory(prefix="export_nifti_") as tmp:
+            _, log_msgs = run(f'dcm2niix -z y -b y -o {tmp} {raw_data_dir}',
+                              self.dry_run)
+            for tmp_file in glob(f"{tmp}/*"):
+                self.move_file(tmp_file)
+                stem = self._get_fname(tmp_file)
+                self.report_issues(stem, str(log_msgs))
+
+    def move_file(self, gen_file):
+        """Move the temp outputs of dcm2niix to the intended output directory.
+
+        Args:
+            gen_file (:obj:`str`): The full path to the generated nifti file
+                to move.
+        """
+        fname = self._get_fname(gen_file)
+
+        if not fname:
+            return
+
+        out_file = os.path.join(self.output_dir, fname)
+        if os.path.exists(out_file):
+            logger.info(f"Output {out_file} already exists. Skipping.")
+            return
+
+        return_code, _ = run(f"mv {gen_file} {out_file}", self.dry_run)
+        if return_code:
+            logger.debug(f"Moving dcm2niix output {gen_file} to {out_file} "
+                         "has failed.")
+
+    def _get_fname(self, gen_file):
+        """Get the intended datman-style name for a generated file.
+
+        Args:
+            gen_file (:obj:`str`): The full path to the generated nifti file
+                to move.
+
+        Result:
+            str: A string filename (with extension) or an empty string.
+        """
+        ext = get_extension(gen_file)
+        bname = os.path.basename(gen_file)
+
+        if self.echo_dict:
+            stem = self._get_echo_fname(bname, ext)
+            if stem != self.fname_root:
+                # File belongs to the wrong echo, skip it
+                return ""
+        else:
+            stem = self.fname_root
+        return stem + ext
+
+    def _get_echo_fname(self, fname, ext):
+        """Get a valid datman-style file name from a multiecho file.
+
+        Args:
+            fname (:obj:`str`): A filename to parse for an echo number.
+            ext (:obj:`str`): The file extension to use.
+
+        Returns:
+            str: A valid datman-style file name or an empty string if one
+                cannot be made.
+        """
+        # Match a 14 digit timestamp and 1-3 digit series num
+        regex = "files_(.*)_([0-9]{14})_([0-9]{1,3})(.*)?" + ext
+        match = re.search(regex, fname)
+
+        if not match:
+            logger.error(f"Can't parse valid echo number from {fname}.")
+            return ""
+
+        try:
+            echo = int(match.group(4).split('e')[-1][0])
+            stem = self.echo_dict[echo]
+        except Exception:
+            logger.error(f"Can't parse valid echo number from {fname}")
+            return ""
+
+        return stem
+
+    def report_issues(self, stem, messages):
+        """Write an error log if dcm2niix had errors during conversion.
+
+        Args:
+            stem (:obj:`stem`): A valid datman-style file name (minus
+                extension).
+            messages (:obj:`str`): Error messages to write.
+        """
+        if self.dry_run:
+            logger.info(f"DRYRUN - Skipping write of error log for {stem}")
+            return
+
+        if 'missing images' not in messages:
+            # The only issue we care about currently is if files are missing
+            return
+
+        dest = os.path.join(self.output_dir, stem) + ".err"
+        self._write_error_log(dest, messages)
+
+    def _write_error_log(self, dest, messages):
+        """Write an error message to the file system.
+
+        Args:
+            dest (:obj:`str`): The full path of the file to write.
+            messages (:obj:`str`): Intended contents of the error log.
+        """
+        try:
+            with open(dest, "w") as output:
+                output.write(messages)
+        except Exception as exc:
+            logger.error(f"Failed writing dcm2niix errors to {dest}. "
+                         f"Reason - {type(exc).__name__} {exc} ")
+
+
+class DcmExporter(SeriesExporter):
+    """Export a single dicom from a scan.
+    """
+
+    type = "dcm"
+    ext = ".dcm"
+
+    def export(self, raw_data_dir, **kwargs):
+        self.make_output_dir()
+
+        if self.echo_dict:
+            self._export_multi_echo(raw_data_dir)
+            return
+
+        dcm_file = self._find_dcm(raw_data_dir)
+        if not dcm_file:
+            logger.error(f"No dicom files found in {raw_data_dir}")
+            return
+
+        logger.debug(f"Exporting a dcm file from {raw_data_dir} to "
+                     f"{self.output_dir}")
+        output = os.path.join(self.output_dir, self.fname_root + self.ext)
+        run(f"cp {dcm_file} {output}", self.dry_run)
+
+    def _find_dcm(self, raw_data_dir):
+        """Find the path to a valid dicom in the given directory.
+
+        Args:
+            raw_data_dir (:obj:`str`): The full path to the directory where
+                raw dicoms were downloaded for the series.
+
+        Returns:
+            str: the full path to the first readable dicom found.
+        """
+        for path in glob(f"{raw_data_dir}/*"):
+            try:
+                dicom.read_file(path)
+            except dicom.filereader.InvalidDicomError:
+                pass
+            else:
+                return path
+        return ""
+
+    def _export_multi_echo(self, raw_data_dir):
+        """Find a single valid dicom for each echo in a multiecho scan.
+
+        Args:
+            raw_data_dir (:obj:`str`): The full path to the directory where
+                raw dicoms were downloaded for the series.
+        """
+        dcm_dict = {}
+        for path in glob(f"{raw_data_dir}/*"):
+            try:
+                dcm_file = dicom.read_file(path)
+            except dicom.filereader.InvalidDicomError:
+                continue
+            dcm_echo_num = dcm_file.EchoNumbers
+            if dcm_echo_num not in dcm_dict:
+                dcm_dict[int(dcm_echo_num)] = path
+            if len(dcm_dict) == len(self.echo_dict):
+                break
+
+        for echo_num, dcm_echo_num in zip(self.echo_dict.keys(),
+                                          dcm_dict.keys()):
+            output_file = os.path.join(self.output_dir,
+                                       self.echo_dict[echo_num] + self.ext)
+            logger.debug(f"Exporting a dcm file from {raw_data_dir} to "
+                         f"{output_file}")
+            cmd = f"cp {dcm_dict[dcm_echo_num]} {output_file}"
+            run(cmd, self.dry_run)

From b73e2c88bf9030f9143fc05fc6551653d5794d65 Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Mon, 28 Jul 2025 18:03:07 -0400
Subject: [PATCH 37/45] [REF] Move BidsOptions, accept arbitrary dcm2bids
 options

- BidsOptions class is now specific to each bids exporter module
- dm_xnat_extract will more flexibly handle options for dcm2bids
	and other future wrapped tools.
---
 bin/dm_xnat_extract.py          | 108 +++++++++++++--------------
 datman/exporters/bids.py        | 126 ++++++++++++++++++++++++++++++++
 datman/exporters/bids_legacy.py |  53 +++++++++++++-
 3 files changed, 228 insertions(+), 59 deletions(-)
 create mode 100644 datman/exporters/bids.py

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index 95e5cd45..9ed2ad9d 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -59,74 +59,26 @@
 import datman.scanid
 import datman.xnat
 from datman.utils import (validate_subject_id, define_folder,
-                          make_temp_directory, locate_metadata, read_blacklist)
+                          make_temp_directory, read_blacklist)
 
 logger = logging.getLogger(os.path.basename(__file__))
 
 
-class BidsOptions:
-    """Helper class for options related to exporting to BIDS format.
-    """
-
-    def __init__(self, config, keep_dcm=False, bids_out=None,
-                 force_dcm2niix=False, clobber=False, dcm2bids_config=None,
-                 log_level="INFO", refresh=False):
-        self.keep_dcm = keep_dcm
-        self.force_dcm2niix = force_dcm2niix
-        self.clobber = clobber
-        self.refresh = refresh
-        self.bids_out = bids_out
-        self.log_level = log_level
-        self.dcm2bids_config = self.get_bids_config(
-            config, bids_conf=dcm2bids_config)
-
-    def get_bids_config(self, config, bids_conf=None):
-        """Find the path to a valid dcm2bids config file.
-
-        Args:
-            config (:obj:`datman.config.config`): The datman configuration.
-            bids_conf (:obj:`str`, optional): The user provided path to
-                the config file. Defaults to None.
-
-        Raises:
-            datman.exceptions.MetadataException if a valid file cannot
-                be found.
-
-        Returns:
-            str: The full path to a dcm2bids config file.
-        """
-        if bids_conf:
-            path = bids_conf
-        else:
-            try:
-                path = locate_metadata("dcm2bids.json", config=config)
-            except FileNotFoundError as exc:
-                raise datman.exceptions.MetadataException(
-                    "No dcm2bids.json config file available for "
-                    f"{config.study_name}") from exc
-
-        if not os.path.exists(path):
-            raise datman.exceptions.MetadataException(
-                "No dcm2bids.json settings provided.")
-
-        return path
-
-
 def main():
-    args = read_args()
+    args, tool_opts = read_args()
 
     log_level = get_log_level(args)
     configure_logging(args.study, log_level)
 
     if args.use_dcm2bids and not datman.exporters.DCM2BIDS_FOUND:
-        logger.error("Failed to import Dcm2Bids. Ensure that "
+        logger.error("Failed to locate Dcm2Bids. Ensure that "
                      "Dcm2Bids is installed when using the "
                      "--use-dcm2bids flag.  Exiting conversion")
         return
 
     config = datman.config.config(study=args.study)
     if args.use_dcm2bids:
-        bids_opts = BidsOptions(
+        bids_opts = datman.exporters.BidsOptions(
             config,
             keep_dcm=args.keep_dcm,
             force_dcm2niix=args.force_dcm2niix,
@@ -134,7 +86,8 @@ def main():
             dcm2bids_config=args.dcm_config,
             bids_out=args.bids_out,
             log_level=log_level,
-            refresh=args.refresh
+            refresh=args.refresh,
+            extra_opts=tool_opts.get('--dcm2bids-')
         )
     else:
         bids_opts = None
@@ -236,7 +189,12 @@ def _is_file(path, parser):
     )
 
     g_dcm2bids = parser.add_argument_group(
-        "Options for using dcm2bids"
+        "Options for using dcm2bids. Note that you can feed options directly "
+        "to dcm2bids by prefixing any with '--dcm2bids-'. For example, the "
+        "dcm2bids option 'auto-extract-entities' can be used with "
+        "'--dcm2bids-auto-extract-entities'. Note that the spelling and case "
+        "must match exactly what dcm2bids expects to receive and must exist "
+        "for the version of dcm2bids in use"
     )
     g_dcm2bids.add_argument(
         "--bids-out", action="store", metavar="DIR",
@@ -289,14 +247,50 @@ def _is_file(path, parser):
         help="Do nothing"
     )
 
-    args = parser.parse_args()
+    tool_opts, clean_args = parse_tool_opts(sys.argv[1:], ['--dcm2bids-'])
+    args = parser.parse_args(clean_args)
 
     bids_opts = [args.keep_dcm, args.dcm_config, args.bids_out,
                  args.force_dcm2niix, args.clobber, args.refresh]
-    if not args.use_dcm2bids and any(bids_opts):
+    if not args.use_dcm2bids and (any(bids_opts) or
+                                  '--dcm2bids-' in tool_opts):
         parser.error("dcm2bids configuration requires --use-dcm2bids")
 
-    return args
+    return args, tool_opts
+
+
+def parse_tool_opts(
+        args: list[str],
+        accepted_prefixes: list[str]
+    ) -> tuple[dict[str, list[str]], list[str]]:
+    """Collect user options intended for wrapped tools.
+
+    Args:
+        args (list[str]): A list of string inputs to process.
+        accepted_prefixes (list[str]): a list of prefixes for options that
+            will be accepted.
+
+    Returns:
+        tuple[dict[str, list[str]], list[str]]:
+            A tuple containing:
+                - A dictionary mapping an accepted prefix and arguments
+                    associated with it.
+                - A list of all arguments the user provided that do not match
+                    an accepted prefix.
+    """
+    extra_opts = {}
+    clean_args = []
+    for arg in args:
+        found = False
+        for prefix in accepted_prefixes:
+            if arg.startswith(prefix):
+                found = True
+                opt = arg[len(prefix):]
+                # _, opt = arg.split(prefix)
+                extra_opts.setdefault(prefix, []).append(opt)
+        if not found:
+            clean_args.append(arg)
+    return extra_opts, clean_args
 
 
 def get_log_level(args):
diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
new file mode 100644
index 00000000..5e6dc900
--- /dev/null
+++ b/datman/exporters/bids.py
@@ -0,0 +1,126 @@
+"""Export to bids format when using containerized dcm2bids (or versions >=3)
+"""
+import os
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+import datman.config
+from .base import SessionExporter
+from datman.exceptions import MetadataException
+from datman.utils import locate_metadata
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["BidsExporter", "BidsOptions"]
+
+
+@dataclass
+class BidsOptions:
+    """Helper class for options related to exporting to BIDS format.
+    """
+    dm_config: datman.config.config
+    keep_dcm: bool = False
+    bids_out: str | None = None
+    force_dcm2niix: bool = False
+    clobber: bool = False
+    dcm2bids_config: str | None = None
+    log_level: str = "INFO"
+    refresh: bool = False
+    extra_opts: list = None
+
+    def __post_init__(self):
+        self.dcm2bids_config = self.get_bids_config(
+            self.dm_config,
+            bids_conf=self.dcm2bids_config
+        )
+
+    def get_bids_config(self, config: datman.config.config,
+                        bids_conf: str | None = None) -> str:
+        """Find the path to a valid dcm2bids config file.
+
+        Args:
+            config (:obj:`datman.config.config`): The datman configuration.
+            bids_conf (:obj:`str`, optional): The user provided path to
+                the config file. Defaults to None.
+
+        Raises:
+            datman.exceptions.MetadataException if a valid file cannot
+                be found.
+
+        Returns:
+            str: The full path to a dcm2bids config file.
+        """
+        if bids_conf:
+            path = bids_conf
+        else:
+            try:
+                path = locate_metadata("dcm2bids.json", config=config)
+            except FileNotFoundError as exc:
+                raise MetadataException(
+                    "No dcm2bids.json config file available for "
+                    f"{config.study_name}") from exc
+
+        if not os.path.exists(path):
+            raise MetadataException("No dcm2bids.json settings provided.")
+
+        return path
+
+
+class BidsExporter(SessionExporter):
+
+    type = "bids"
+
+    def __init__(self, config, session, experiment, bids_opts=None, **kwargs):
+        self.dcm_dir = experiment.dcm_subdir
+        self.bids_sub = session._ident.get_bids_name()
+        self.bids_ses = session._ident.timepoint
+        self.repeat = session._ident.session
+        self.bids_folder = session.bids_root
+        self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids",
+                                     f"{session.bids_sub}_{session.bids_ses}")
+        self.output_dir = session.bids_path
+        self.keep_dcm = bids_opts.keep_dcm if bids_opts else False
+        self.force_dcm2niix = bids_opts.force_dcm2niix if bids_opts else False
+        self.clobber = bids_opts.clobber if bids_opts else False
+        self.log_level = bids_opts.log_level if bids_opts else "INFO"
+        self.dcm2bids_config = bids_opts.dcm2bids_config if bids_opts else None
+        self.refresh = bids_opts.refresh if bids_opts else False
+
+        # Can be removed if dcm2bids patches the log issue
+        self.set_log_level()
+
+        super().__init__(config, session, experiment, **kwargs)
+        return
+
+
+class NiiLinkExporter(SessionExporter):
+
+    type = "nii_link"
+    ext = ".nii.gz"
+
+    def __init__(self, config, session, experiment, **kwargs):
+        return
+
+    def get_dm_names(self):
+        """Get the datman-style scan names for an entire XNAT experiment.
+
+        Returns:
+            :obj:`dict`: A dict of series numbers matched to a list of
+                datman-style names for all scans found for the session on XNAT.
+        """
+        # Difference number 1: This will return every series, even
+        #   the ones that don't get assigned a name in the traditional
+        names = {}
+        for scan in self.experiment.scans:
+            try:
+                series = int(scan.series)
+            except ValueError:
+                # XNAT sometimes adds a string when it finds duplicate series
+                # numbers. This is an error that should be resolved on the
+                # server so these instances are safe to ignore.
+                continue
+            names.setdefault(series, []).extend(scan.names)
+        return names
+
+    # def get_bids_names(self):
diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py
index ab9d06d1..1b6a5e86 100644
--- a/datman/exporters/bids_legacy.py
+++ b/datman/exporters/bids_legacy.py
@@ -17,9 +17,11 @@
 import os
 import re
 
+from datman.exceptions import MetadataException
 from datman.scanid import make_filename
 from datman.utils import (splitext, get_extension, write_json, read_json,
-                          filter_niftis, read_blacklist, get_relative_source)
+                          filter_niftis, read_blacklist, get_relative_source,
+                          locate_metadata)
 
 from dcm2bids import dcm2bids, Dcm2bids
 from dcm2bids.sidecar import Acquisition
@@ -28,7 +30,54 @@
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["BidsExporter", "NiiLinkExporter"]
+__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"]
+
+
+class BidsOptions:
+    """Helper class for options related to exporting to BIDS format.
+    """
+
+    def __init__(self, config, keep_dcm=False, bids_out=None,
+                 force_dcm2niix=False, clobber=False, dcm2bids_config=None,
+                 log_level="INFO", refresh=False, **kwargs):
+        self.keep_dcm = keep_dcm
+        self.force_dcm2niix = force_dcm2niix
+        self.clobber = clobber
+        self.refresh = refresh
+        self.bids_out = bids_out
+        self.log_level = log_level
+        self.dcm2bids_config = self.get_bids_config(
+            config, bids_conf=dcm2bids_config)
+
+    def get_bids_config(self, config, bids_conf=None):
+        """Find the path to a valid dcm2bids config file.
+
+        Args:
+            config (:obj:`datman.config.config`): The datman configuration.
+            bids_conf (:obj:`str`, optional): The user provided path to
+                the config file. Defaults to None.
+
+        Raises:
+            datman.exceptions.MetadataException if a valid file cannot
+                be found.
+
+        Returns:
+            str: The full path to a dcm2bids config file.
+        """
+        if bids_conf:
+            path = bids_conf
+        else:
+            try:
+                path = locate_metadata("dcm2bids.json", config=config)
+            except FileNotFoundError as exc:
+                raise MetadataException(
+                    "No dcm2bids.json config file available for "
+                    f"{config.study_name}") from exc
+
+        if not os.path.exists(path):
+            raise MetadataException("No dcm2bids.json settings provided.")
+
+        return path
 
 
 class BidsExporter(SessionExporter):

From 4cd539965afcd54528c7bfd0d52890fefae8ffed Mon Sep 17 00:00:00 2001
From: Dawn Smith <dawn.smith@camh.ca>
Date: Thu, 31 Jul 2025 21:25:34 -0400
Subject: [PATCH 38/45] [WIP] Add some functionality back to new bids class

---
 datman/exporters/bids.py | 209 +++++++++++++++++++++++++++++++++++----
 1 file changed, 191 insertions(+), 18 deletions(-)

diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index 5e6dc900..6a1bbf17 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -2,6 +2,7 @@
 """
 import os
 import logging
+import json
 from dataclasses import dataclass
 from pathlib import Path
 
@@ -71,8 +72,8 @@ class BidsExporter(SessionExporter):
 
     type = "bids"
 
-    def __init__(self, config, session, experiment, bids_opts=None, **kwargs):
-        self.dcm_dir = experiment.dcm_subdir
+    def __init__(self, config, session, importer, bids_opts=None, **kwargs):
+        self.dcm_dir = importer.dcm_subdir
         self.bids_sub = session._ident.get_bids_name()
         self.bids_ses = session._ident.timepoint
         self.repeat = session._ident.session
@@ -87,40 +88,212 @@ def __init__(self, config, session, experiment, bids_opts=None, **kwargs):
         self.dcm2bids_config = bids_opts.dcm2bids_config if bids_opts else None
         self.refresh = bids_opts.refresh if bids_opts else False
 
-        # Can be removed if dcm2bids patches the log issue
-        self.set_log_level()
-
-        super().__init__(config, session, experiment, **kwargs)
+        super().__init__(config, session, importer, **kwargs)
         return
 
+    def outputs_exist(self):
+        if self.refresh:
+            logger.info(
+                f"Re-comparing existing tmp folder for {self.output_dir}"
+                "to dcm2bids config to pull missed series."
+            )
+            return False
+
+        if self.clobber:
+            logger.info(
+                f"{self.output_dir} will be overwritten due to clobber option."
+            )
+            return False
+
+        out_dir = Path(self.output_dir)
+        if not out_dir.exists():
+            return False
+
+        json_files = out_dir.rglob("*.json")
+
+
+        expected_scans = self.get_expected_scans()
+        actual_scans = self.get_actual_scans()
+        _, missing = self.check_contents(expected_scans, actual_scans)
+        if missing:
+            return False
+
+        return True
+
+    def get_contents(self):
+        outputs = {}
+
+
+
 
 class NiiLinkExporter(SessionExporter):
+    """Populates a study's nii folder with symlinks pointing to the bids dir.
+    """
 
     type = "nii_link"
     ext = ".nii.gz"
 
-    def __init__(self, config, session, experiment, **kwargs):
-        return
+    def __init__(self, config, session, importer, **kwargs):
+        self.ident = session._ident
+        self.output_dir = session.nii_path
+        self.bids_path = session.bids_path
+        self.config = config
+        self.tags = config.get_tags(site=session.site)
+
+        super().__init__(config, session, importer, **kwargs)
+
+        self.dm_names = self.get_dm_names()
+
+    @classmethod
+    def get_output_dir(cls, session):
+        return session.nii_path
+
+    def needs_raw_data(self):
+        return False
 
     def get_dm_names(self):
         """Get the datman-style scan names for an entire XNAT experiment.
 
+        This is used to
+            1) Ensure the contents of the nii folder matches what may have
+               been produced with an old-style NiiExporter
+            2) To predict if an expected scan didn't extract correctly into
+               the bids folder.
+
         Returns:
-            :obj:`dict`: A dict of series numbers matched to a list of
-                datman-style names for all scans found for the session on XNAT.
+            dict: A map of each series number to the name (or
+                names) the series would be exported under.
         """
-        # Difference number 1: This will return every series, even
-        #   the ones that don't get assigned a name in the traditional
         names = {}
         for scan in self.experiment.scans:
             try:
-                series = int(scan.series)
+                series_num = int(scan.series)
             except ValueError:
-                # XNAT sometimes adds a string when it finds duplicate series
-                # numbers. This is an error that should be resolved on the
-                # server so these instances are safe to ignore.
+                # Ignore xnat scans with non-numeric series numbers.
+                # These are often of the form MR-XX and result from duplicated
+                # uploads / errors when merging on xnat.
                 continue
-            names.setdefault(series, []).extend(scan.names)
+            names[series_num] = scan.names
         return names
 
-    # def get_bids_names(self):
+    def get_bids_sidecars(self):
+        """Get all sidecars from a BIDS session.
+
+        Returns:
+            :obj:`dict`: A map from the series number to the sidecar(s) that
+                belong to that series.
+        """
+        sidecars = {}
+        bids_folder = Path(self.bids_path)
+        for sidecar in bids_folder.rglob("*.json"):
+            try:
+                contents = sidecar.read_text(encoding="utf-8")
+            except (UnicodeDecodeError, OSError) as e:
+                logger.debug(
+                    f"Ignoring unreadable json sidecar {sidecar} - {e}"
+                )
+                continue
+
+            try:
+                data = json.loads(contents)
+            except (json.JSONDecodeError, TypeError) as e:
+                logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}")
+                continue
+
+            data["path"] = sidecar
+
+            if "SeriesNumber" not in data:
+                continue
+
+            # Need code later to handle split series (do they always
+            # prefix series number with "10"?)
+            # -> For new CALM sessions it doesnt, it just allows them to
+            #   retain the original series number (and duplicates it)
+            #   not sure if this is because of CALM or a change in dcm2niix
+            #   or a change in dcm2bids
+            try:
+                series_num = int(data["SeriesNumber"])
+            except ValueError:
+                continue
+
+            sidecars.setdefault(series_num, []).append(data)
+
+        fix_split_series(sidecars)
+
+        return sidecars
+
+
+def get_bids_sidecars(bids_path, repeat):
+    """Get all sidecars from a BIDS session.
+
+    Returns:
+        :obj:`dict`: A map from the series number to the sidecar(s) that
+            belong to that series.
+    """
+    bids_folder = Path(bids_path)
+    sidecars = {}
+
+    for sidecar in bids_folder.rglob("*.json"):
+        try:
+            contents = sidecar.read_text(encoding="utf-8")
+        except (UnicodeDecodeError, OSError) as e:
+            logger.debug(
+                f"Ignoring unreadable json sidecar {sidecar} - {e}"
+            )
+            continue
+
+        try:
+            data = json.loads(contents)
+        except (json.JSONDecodeError, TypeError) as e:
+            logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}")
+            continue
+
+        data["path"] = sidecar
+
+        if "SeriesNumber" not in data:
+            continue
+
+        if "Repeat" not in data:
+            if repeat == "01":
+                # Assume sidecar belongs to this session, as there's
+                # usually only 1 'repeat' anyway
+                data["Repeat"] = "01"
+            else:
+                continue
+
+        if data["Repeat"] != repeat:
+            continue
+
+        try:
+            series_num = int(data["SeriesNumber"])
+        except ValueError:
+            continue
+
+        sidecars.setdefault(series_num, []).append(data)
+
+    fix_split_series(sidecars)
+
+    return sidecars
+
+
+def fix_split_series(sidecars):
+    # Handle legacy dcm2bids/dcm2niix split sessions which recieved a
+    # "10" prefix to their series numbers (e.g. '05' would become '1005'
+    # for one half of a split fmap)
+    all_str_series = [str(series).zfill(2) for series in sidecars]
+    delete = []
+    for series in sidecars:
+        str_series = str(series)
+        if not str_series.startswith("10"):
+            continue
+        if len(str_series) < 4:
+            continue
+        trimmed_series = str_series[2:]
+        if trimmed_series not in all_str_series:
+            # False alarm, just a weird custom series
+            continue
+        sidecars[int(trimmed_series)].extend(sidecars[series])
+        delete.append(series)
+    for series in delete:
+        del sidecars[series]
+    return sidecars
\ No newline at end of file

From 601dabf7e68fb99b344a74fab0b16bb27412076d Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Fri, 8 Aug 2025 13:42:52 -0400
Subject: [PATCH 39/45] [ENH] Update the NiiLinkExporter to better locate bids
 files

---
 datman/exporters/bids.py | 504 ++++++++++++++++++++++++++++++++-------
 1 file changed, 414 insertions(+), 90 deletions(-)

diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index 6a1bbf17..e73043a4 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -3,17 +3,19 @@
 import os
 import logging
 import json
+import re
 from dataclasses import dataclass
 from pathlib import Path
 
 import datman.config
 from .base import SessionExporter
 from datman.exceptions import MetadataException
-from datman.utils import locate_metadata
+from datman.utils import locate_metadata, read_blacklist, get_relative_source
+from datman.scanid import make_filename
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["BidsExporter", "BidsOptions"]
+__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"]
 
 
 @dataclass
@@ -124,8 +126,6 @@ def get_contents(self):
         outputs = {}
 
 
-
-
 class NiiLinkExporter(SessionExporter):
     """Populates a study's nii folder with symlinks pointing to the bids dir.
     """
@@ -137,13 +137,12 @@ def __init__(self, config, session, importer, **kwargs):
         self.ident = session._ident
         self.output_dir = session.nii_path
         self.bids_path = session.bids_path
+        self.repeat = session.session
         self.config = config
         self.tags = config.get_tags(site=session.site)
 
         super().__init__(config, session, importer, **kwargs)
 
-        self.dm_names = self.get_dm_names()
-
     @classmethod
     def get_output_dir(cls, session):
         return session.nii_path
@@ -151,6 +150,62 @@ def get_output_dir(cls, session):
     def needs_raw_data(self):
         return False
 
+    def outputs_exist(self):
+        sidecars = self.get_bids_sidecars()
+        name_map = self.make_dm_names(sidecars)
+
+        for dm_name in name_map:
+
+            if read_blacklist(scan=dm_name, config=self.config):
+                continue
+
+            full_path = os.path.join(self.output_dir, dm_name + self.ext)
+            if not os.path.exists(full_path):
+                return False
+
+        return True
+
+    def export(self, *args, **kwargs):
+        sidecars = self.get_bids_sidecars()
+        name_map = self.make_dm_names(sidecars)
+
+        if self.dry_run:
+            logger.info("Dry run: Skipping making nii folder links for "
+                        f"mapping {self.name_map}")
+            return
+
+        if self.outputs_exist():
+            return
+
+        self.make_output_dir()
+
+        for dm_name, bids_name in self.name_map.items():
+            self.link_scan(dm_name, bids_name)
+
+    def link_scan(self, dm_name: str, bids_root: Path | str):
+        """Create a symlink in the datman style that points to a bids file.
+
+        Args:
+            dm_name (:obj:`str`): A valid datman file name.
+            bids_root (:obj:`pathlib.Path`): The full path to a bids file
+                (without an extension).
+        """
+
+        if read_blacklist(scan=dm_name, config=self.config):
+            logger.debug(f"Ignoring blacklisted scan {dm_name}")
+            return
+
+        base_target = os.path.join(self.output_dir, dm_name)
+        for source in glob(bids_file + "*"):
+            ext = get_extension(source)
+            target = base_target + ext
+
+            if is_broken_link(target):
+                remove_broken_link(target)
+
+            rel_source = get_relative_source(source, target)
+            make_link(rel_source, target)
+
     def get_dm_names(self):
         """Get the datman-style scan names for an entire XNAT experiment.
 
@@ -176,124 +231,393 @@ def get_dm_names(self):
             names[series_num] = scan.names
         return names
 
-    def get_bids_sidecars(self):
-        """Get all sidecars from a BIDS session.
+    def get_bids_sidecars(self) -> dict[int, list]:
+        """Get all sidecars from the session's BIDS folder.
 
         Returns:
-            :obj:`dict`: A map from the series number to the sidecar(s) that
-                belong to that series.
+            :obj:`dict`: A map from the series number to a list of the JSON
+                sidecar contents that result from that series.
         """
         sidecars = {}
         bids_folder = Path(self.bids_path)
         for sidecar in bids_folder.rglob("*.json"):
-            try:
-                contents = sidecar.read_text(encoding="utf-8")
-            except (UnicodeDecodeError, OSError) as e:
-                logger.debug(
-                    f"Ignoring unreadable json sidecar {sidecar} - {e}"
-                )
-                continue
 
-            try:
-                data = json.loads(contents)
-            except (json.JSONDecodeError, TypeError) as e:
-                logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}")
+            contents = self.read_sidecar(sidecar)
+            if not contents:
                 continue
 
-            data["path"] = sidecar
+            if not self.matches_repeat(contents):
+                continue
 
-            if "SeriesNumber" not in data:
+            if "SeriesNumber" not in contents:
+                logger.debug(
+                    "Ignoring malformed sidecar file (missing SeriesNumber): "
+                    f"{sidecar}"
+                )
                 continue
 
-            # Need code later to handle split series (do they always
-            # prefix series number with "10"?)
-            # -> For new CALM sessions it doesnt, it just allows them to
-            #   retain the original series number (and duplicates it)
-            #   not sure if this is because of CALM or a change in dcm2niix
-            #   or a change in dcm2bids
             try:
-                series_num = int(data["SeriesNumber"])
+                series_num = int(contents["SeriesNumber"])
             except ValueError:
+                logger.debug(
+                    f"Ignoring non-numeric series number in {sidecar}"
+                )
                 continue
 
-            sidecars.setdefault(series_num, []).append(data)
+            sidecars.setdefault(series_num, []).append(contents)
 
-        fix_split_series(sidecars)
+        self.fix_split_series_nums(sidecars)
 
         return sidecars
 
+    def read_sidecar(self, sidecar: str | Path) -> dict:
+        """Read the contents of a JSON sidecar file.
 
-def get_bids_sidecars(bids_path, repeat):
-    """Get all sidecars from a BIDS session.
-
-    Returns:
-        :obj:`dict`: A map from the series number to the sidecar(s) that
-            belong to that series.
-    """
-    bids_folder = Path(bids_path)
-    sidecars = {}
+        NOTE: This adds the path of the file itself under the key 'Path'
+        """
+        if not isinstance(sidecar, Path):
+            sidecar = Path(sidecar)
 
-    for sidecar in bids_folder.rglob("*.json"):
         try:
             contents = sidecar.read_text(encoding="utf-8")
         except (UnicodeDecodeError, OSError) as e:
             logger.debug(
-                f"Ignoring unreadable json sidecar {sidecar} - {e}"
+                f"Sidecar file is unreadable {sidecar} - {e}"
             )
-            continue
+            return {}
 
         try:
             data = json.loads(contents)
         except (json.JSONDecodeError, TypeError) as e:
-            logger.debug(f"Ignoring invalid json sidecar {sidecar} - {e}")
-            continue
+            logger.debug(f"Invalid json sidecar {sidecar} - {e}")
+            return {}
 
-        data["path"] = sidecar
+        data["Path"] = sidecar
 
-        if "SeriesNumber" not in data:
-            continue
+        return data
 
-        if "Repeat" not in data:
-            if repeat == "01":
-                # Assume sidecar belongs to this session, as there's
-                # usually only 1 'repeat' anyway
-                data["Repeat"] = "01"
-            else:
+    def matches_repeat(self, sidecar: dict) -> bool:
+        """Check if a sidecar matches the current session's 'repeat'.
+
+        The 'repeat' number is used to track when a scan session was stopped
+        and restarted during a visit. Most of the time it will be '01'.
+        """
+        if "Repeat" not in sidecar:
+            # If this session is the first 'repeat' it's safe to assume an
+            # untagged sidecar belongs to it, since usually there's only one
+            # 'repeat' anyway.
+            return self.repeat == "01"
+        return sidecar["Repeat"] == self.repeat
+
+    def fix_split_series_nums(self, sidecars: dict[int, list]
+            ) -> dict[int, list]:
+        """Attempt to correct series nums that have been prefixed with '10'.
+
+        Some older versions of dcm2niix/dcm2bids liked to prefix half of a
+        split series' number with '10' rather than allowing all sidecars
+        to share the original series num. This attempts to identify when
+        that has happened and find the original series number for these
+        files.
+        """
+        all_series = [str(series).zfill(2) for series in sidecars]
+        must_delete = []
+
+        for series in sidecars:
+            str_series = str(series)
+
+            if not str_series.startswith("10"):
+                continue
+
+            if len(str_series) < 4:
                 continue
 
-        if data["Repeat"] != repeat:
-            continue
+            trimmed_series = str_series[2:]
+            if trimmed_series not in all_series:
+                # False alarm, probably not a mutated series number
+                continue
 
-        try:
-            series_num = int(data["SeriesNumber"])
-        except ValueError:
-            continue
-
-        sidecars.setdefault(series_num, []).append(data)
-
-    fix_split_series(sidecars)
-
-    return sidecars
-
-
-def fix_split_series(sidecars):
-    # Handle legacy dcm2bids/dcm2niix split sessions which recieved a
-    # "10" prefix to their series numbers (e.g. '05' would become '1005'
-    # for one half of a split fmap)
-    all_str_series = [str(series).zfill(2) for series in sidecars]
-    delete = []
-    for series in sidecars:
-        str_series = str(series)
-        if not str_series.startswith("10"):
-            continue
-        if len(str_series) < 4:
-            continue
-        trimmed_series = str_series[2:]
-        if trimmed_series not in all_str_series:
-            # False alarm, just a weird custom series
-            continue
-        sidecars[int(trimmed_series)].extend(sidecars[series])
-        delete.append(series)
-    for series in delete:
-        del sidecars[series]
-    return sidecars
\ No newline at end of file
+            sidecars[int(trimmed_series)].extend(sidecars[series])
+            must_delete.append(series)
+
+        for series in must_delete:
+            del sidecars[series]
+
+        return sidecars
+
+    def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]:
+        """Create a datman-style name for each identifiable sidecar.
+
+        Args:
+            sidecars (`dict`): A dictionary mapping series numbers to a list
+                of bids sidecar files generated by that series.
+
+        Returns:
+            dict: a dictionary mapping a datman-style filename to the bids
+                sidecar path (minus extension) it belongs to.
+        """
+        found_names = {}
+        reqs = self.get_tag_requirements()
+        for series in sidecars:
+
+            temp_names = {}
+            for item in sidecars[series]:
+
+                found = self.find_tag(item, reqs)
+
+                if not found:
+                    logger.debug(f"No tag matches {item['Path']}, ignoring.")
+                    continue
+
+                if len(found) > 1:
+                    logger.error(
+                        f"Multiple tags ({found}) match sidecar "
+                        f"{item['Path']}. Ignoring it. Please update "
+                        "configuration so at most one tag matches."
+                    )
+                    continue
+
+                dm_name = make_filename(
+                    self.ident,
+                    found[0],
+                    series,
+                    item["SeriesDescription"]
+                )
+
+                temp_names.setdefault(dm_name, []).append(item)
+
+            found_names = self.handle_duplicate_names(found_names, temp_names)
+
+        return found_names
+
+    def get_tag_requirements(self) -> dict[str, dict]:
+        """Read and reformat user configuration for all tags.
+
+        As described in datman's configuration documentation, at a minimum each
+        tag must define a 'SeriesDescription' regular expression. Tags
+        may optionally include a 'Bids' section, alongside datman's
+        'Pattern' and 'Count' fields for a tag to make it more restrictive or
+        accurate.
+
+        If included, the 'Bids' section should contain a list of sidecar field
+        names to check when determining if a tag can by applied. These must
+        match the sidecars fields verbatim (case-sensitive). Each field name
+        may then point to either:
+
+            - a literal string to be matched
+            - a dictionary of settings
+
+        The dictionary of settings may include the following keys:
+
+        - **Pattern** (`str` or list, optional): May be a literal string or a
+          regular expression in Python format (e.g., use `.*` not `*`), or a
+          list of literal strings. Optional if `Exclude` is given. If omitted
+          and `Exclude` is used, the presence of the field name alone
+          excludes a sidecar from taking the tag.
+        - **Regex** (`bool`, optional): Indicates whether `Pattern` is a regex
+          or a string literal. Default is `False`.
+        - **Exclude** (`bool`, optional): Indicates whether to exclude sidecars
+          that match the pattern (i.e., take the inverse). Default is `False`.
+
+        Examples:
+            Below are some YAML examples of commonly used configuration.
+
+            Prevent any sidecar with an 'IntendedFor' field from matching
+            a tag:
+
+                Bids:
+                    IntendedFor:
+                        Exclude: True
+
+            Match a sidecar only if the PhaseEncodingDirection is exactly 'j':
+
+                Bids:
+                    PhaseEncodingDirection: 'j'
+
+            Match a sidecar only if the ImageType contains 'DERIVED':
+
+                Bids:
+                    ImageType:
+                        Pattern: 'DERIVED'
+                        Regex: True
+
+        Returns:
+            A dictionary mapping each tag name to the requirements that
+                must be met for a tag to be applied to a BIDs sidecar.
+        """
+        reqs = {}
+        for tag in self.tags:
+
+            conf = self.tags.get(tag)
+
+            if is_malformed(conf):
+                logger.error(
+                    f"Ignoring tag {tag} - Incorrectly configured. Each tag "
+                    "must contain a 'Pattern' section and each 'Pattern', at "
+                    "a minimum, must contain a 'SeriesDescription'. Consult "
+                    "the docs for more info.")
+                continue
+
+            regex = conf["Pattern"]["SeriesDescription"]
+            if isinstance(regex, list):
+                regex = "|".join(regex)
+
+            tag_reqs = {
+                "SeriesDescription": {
+                    "Pattern": regex,
+                    "Regex": True,
+                    "Exclude": False
+                }
+            }
+
+            bids_conf = conf.get("Bids", {})
+            for field in bids_conf:
+                # Ensure consistent formatting for settings
+                if isinstance(bids_conf[field], str):
+                    pattern = bids_conf[field]
+                    regex = False
+                    exclude = False
+                else:
+                    pattern = bids_conf[field].get("Pattern", "")
+                    if isinstance(pattern, list):
+                        pattern = str(pattern)
+                    regex = bids_conf[field].get("Regex", False)
+                    exclude = bids_conf[field].get("Exclude", False)
+
+                tag_reqs[field] = {
+                    "Pattern": pattern,
+                    "Regex": regex,
+                    "Exclude": exclude
+                }
+
+            reqs[tag] = tag_reqs
+        return reqs
+
+    def find_tag(self,
+                 sidecar: dict,
+                 requirements: dict | None = None) -> list:
+        """Find which configured tags, if any, can be applied to a sidecar.
+
+        Args:
+            sidecar (`dict`): The contents of a json sidecar.
+            requirements (`dict`, optional): The requirements to match
+                each accepted tag. Default is 'None', in which case the
+                default datman configuration will be consulted.
+
+        Returns:
+            A list of tag names that the sidecar matches.
+        """
+        if not requirements:
+            requirements = self.get_tag_requirements()
+
+        found = []
+        for tag in requirements:
+
+            match = True
+            for field in requirements[tag]:
+                pattern = requirements[tag][field].get("Pattern", "")
+                is_regex = requirements[tag][field].get("Regex", False)
+                exclude = requirements[tag][field].get("Exclude", False)
+
+                if field not in sidecar:
+                    if not exclude:
+                        # Absence of an expected field fails tag match
+                        match = False
+                    continue
+
+                if exclude and not pattern:
+                    # Excluded field is in sidecar, so doesnt match tag
+                    match = False
+                    continue
+
+                actual = sidecar[field]
+                if isinstance(actual, list):
+                    actual = str(actual)
+
+                if is_regex:
+                    comparator = re.search
+                else:
+                    comparator = re.fullmatch
+
+                if not comparator(pattern, actual, re.IGNORECASE):
+                    match = False
+                elif exclude:
+                    # Tag does match, but settings indicate to take inverse
+                    match = False
+            if match:
+                found.append(tag)
+
+        return found
+
+    def handle_duplicate_names(self,
+                               existing_names: dict[str, str],
+                               new_entries: dict[str, dict]
+        ) -> dict[str, str]:
+        """Make duplicated names unique.
+
+        Sometimes, as with multi-echo scans, multiple BIDs files will create
+        the same datman name. This ensures a unique name exists for each.
+
+        Args:
+            existing_names (`dict`): The dictionary to add the fixed name
+                entries to.
+            new_entries (`dict`): New entries that may contain duplicated
+                datman-style names.
+
+        Returns:
+            dict[str, str]: The existing_names dictionary with all
+                new entries merged in with unique names.
+        """
+        for name in new_entries:
+
+            if len(new_entries[name]) == 1:
+                existing_names[name] = remove_extension(
+                    new_entries[name][0]["Path"]
+                )
+                continue
+
+            for sidecar in new_entries[name]:
+                if "EchoNumber" not in sidecar:
+                    logger.error(
+                        "Multiple BIDs files result in same file name "
+                        f"'{name}'. Please update configuration to help "
+                        f"identify file: {sidecar['Path']}"
+                    )
+                    continue
+                new_name = name + f"_ECHO-{sidecar['EchoNumber']}"
+                existing_names[new_name] = remove_extension(sidecar['Path'])
+
+        return existing_names
+
+def is_malformed(config: dict) -> bool:
+    """Check if a tag's configuration is unusably malformed.
+    """
+    if "Pattern" not in config:
+        return True
+    if "SeriesDescription" not in config["Pattern"]:
+        return True
+    return False
+
+def remove_extension(path: Path) -> Path:
+    """Remove all extensions from a path.
+    """
+    while path.suffix:
+        path = path.with_suffix("")
+    return path
+
+def is_broken_link(symlink: str) -> bool:
+    return os.path.islink(symlink) and not os.path.exists(symlink)
+
+def remove_broken_link(target: str):
+    try:
+        os.unlink(target)
+    except OSError as e:
+        logger.error(f"Failed to remove broken symlink {target} - {e}")
+    return
+
+def make_link(source: str, target: str):
+    try:
+        os.symlink(source, target)
+    except FileExistsError:
+        pass
+    except OSError as e:
+        logger.error(f"Failed to create {target} - {e}")
\ No newline at end of file

From b7fe7af4f67f2d919b3ff1929aaef59380ce2c64 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Tue, 12 Aug 2025 16:04:42 -0400
Subject: [PATCH 40/45] [FIX] Make NiiLinkExporter less spammy, handle ints in
 config

---
 datman/exporters/bids.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index e73043a4..fec7d39b 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -369,7 +369,7 @@ def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]:
                     continue
 
                 if len(found) > 1:
-                    logger.error(
+                    logger.debug(
                         f"Multiple tags ({found}) match sidecar "
                         f"{item['Path']}. Ignoring it. Please update "
                         "configuration so at most one tag matches."
@@ -472,13 +472,13 @@ def get_tag_requirements(self) -> dict[str, dict]:
             bids_conf = conf.get("Bids", {})
             for field in bids_conf:
                 # Ensure consistent formatting for settings
-                if isinstance(bids_conf[field], str):
-                    pattern = bids_conf[field]
+                if isinstance(bids_conf[field], (str, int)):
+                    pattern = str(bids_conf[field])
                     regex = False
                     exclude = False
                 else:
                     pattern = bids_conf[field].get("Pattern", "")
-                    if isinstance(pattern, list):
+                    if not isinstance(pattern, str):
                         pattern = str(pattern)
                     regex = bids_conf[field].get("Regex", False)
                     exclude = bids_conf[field].get("Exclude", False)
@@ -530,7 +530,7 @@ def find_tag(self,
                     continue
 
                 actual = sidecar[field]
-                if isinstance(actual, list):
+                if not isinstance(actual, str):
                     actual = str(actual)
 
                 if is_regex:

From 72a3e0c4412bd36425bab52d85ab96fe4d2c6956 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Tue, 12 Aug 2025 16:55:22 -0400
Subject: [PATCH 41/45] [FIX] Correct typos and missing imports

---
 datman/exporters/bids.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index fec7d39b..fb6d7bcc 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -4,13 +4,15 @@
 import logging
 import json
 import re
+from glob import glob
 from dataclasses import dataclass
 from pathlib import Path
 
 import datman.config
 from .base import SessionExporter
 from datman.exceptions import MetadataException
-from datman.utils import locate_metadata, read_blacklist, get_relative_source
+from datman.utils import (locate_metadata, read_blacklist, get_relative_source,
+                          get_extension)
 from datman.scanid import make_filename
 
 logger = logging.getLogger(__name__)
@@ -171,7 +173,7 @@ def export(self, *args, **kwargs):
 
         if self.dry_run:
             logger.info("Dry run: Skipping making nii folder links for "
-                        f"mapping {self.name_map}")
+                        f"mapping {name_map}")
             return
 
         if self.outputs_exist():
@@ -179,7 +181,7 @@ def export(self, *args, **kwargs):
 
         self.make_output_dir()
 
-        for dm_name, bids_name in self.name_map.items():
+        for dm_name, bids_name in name_map.items():
             self.link_scan(dm_name, bids_name)
 
     def link_scan(self, dm_name: str, bids_root: Path | str):
@@ -196,7 +198,7 @@ def link_scan(self, dm_name: str, bids_root: Path | str):
             return
 
         base_target = os.path.join(self.output_dir, dm_name)
-        for source in glob(bids_file + "*"):
+        for source in glob(str(bids_root) + "*"):
             ext = get_extension(source)
             target = base_target + ext
 

From b6502262811b23c26785cc804d9afd1f00ee9689 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Thu, 14 Aug 2025 23:29:52 -0400
Subject: [PATCH 42/45] [FIX] Update bids version selection, re-add bids
 functionality

---
 datman/exporters/__init__.py    |  47 ++++--
 datman/exporters/bids.py        | 265 +++++++++++++++++++++++---------
 datman/exporters/bids_legacy.py |   6 -
 3 files changed, 223 insertions(+), 95 deletions(-)

diff --git a/datman/exporters/__init__.py b/datman/exporters/__init__.py
index 63c62252..378ec493 100644
--- a/datman/exporters/__init__.py
+++ b/datman/exporters/__init__.py
@@ -2,7 +2,9 @@
 import importlib
 import pkgutil
 import logging
+from packaging.version import parse
 
+from datman.utils import check_dependency_configured
 from .base import Exporter, SessionExporter, SeriesExporter
 
 logger = logging.getLogger(__name__)
@@ -30,6 +32,24 @@ def _load_contents(module_name):
     __all__.extend(contents)
 
 
+def is_runnable_container(container):
+    """Check if a container is able to be run.
+    """
+    try:
+        check_dependency_configured("apptainer", shell_cmd="apptainer")
+    except EnvironmentError:
+        logger.error(f"apptainer is not available, ignoring container.")
+        return False
+
+    if not os.path.exists(container):
+        logger.error(
+            f"Container path does not exist - {container}, ignoring container."
+        )
+        return False
+
+    return True
+
+
 # Load everything from exporters folder (except bids exporters) so contents
 # can be accessed as 'datman.exporters' instead of 'datman.exporters.xxx'
 for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(__file__)]):
@@ -42,25 +62,22 @@ def _load_contents(module_name):
 
 if os.getenv("BIDS_CONTAINER"):
     # Container is in use, load bids.py
-    _load_contents("bids")
-    DCM2BIDS_FOUND = True
+    if is_runnable_container(os.getenv("BIDS_CONTAINER")):
+        _load_contents("bids")
+        DCM2BIDS_FOUND = True
+    else:
+        logger.error(f"Cannot use dcm2bids container, ignoring bids.")
+        DCM2BIDS_FOUND = False
 else:
     try:
-        from dcm2bids import dcm2bids, Dcm2bids
-    except ImportError:
-        # dcm2bids is either not installed or version >= 3
-        try:
-            import dcm2bids
-        except ImportError:
-            # No dcm2bids available at all
-            DCM2BIDS_FOUND = False
+        version = importlib.metadata.version("dcm2bids")
+    except importlib.metadata.PackageNotFoundError:
+        DCM2BIDS_FOUND = False
+    else:
+        if parse(version) < parse("3"):
+            _load_contents("bids_legacy")
         else:
-            # dcm2bids is installed and version > 3, use bids.py
             _load_contents("bids")
-            DCM2BIDS_FOUND = True
-    else:
-        # dcm2bids is installed and version < 3, use bids_legacy.py
-        _load_contents("bids_legacy")
         DCM2BIDS_FOUND = True
 
 
diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index fb6d7bcc..7712c7f0 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -12,7 +12,7 @@
 from .base import SessionExporter
 from datman.exceptions import MetadataException
 from datman.utils import (locate_metadata, read_blacklist, get_relative_source,
-                          get_extension)
+                          get_extension, write_json, run)
 from datman.scanid import make_filename
 
 logger = logging.getLogger(__name__)
@@ -85,15 +85,14 @@ def __init__(self, config, session, importer, bids_opts=None, **kwargs):
         self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids",
                                      f"{session.bids_sub}_{session.bids_ses}")
         self.output_dir = session.bids_path
-        self.keep_dcm = bids_opts.keep_dcm if bids_opts else False
-        self.force_dcm2niix = bids_opts.force_dcm2niix if bids_opts else False
-        self.clobber = bids_opts.clobber if bids_opts else False
-        self.log_level = bids_opts.log_level if bids_opts else "INFO"
-        self.dcm2bids_config = bids_opts.dcm2bids_config if bids_opts else None
-        self.refresh = bids_opts.refresh if bids_opts else False
+        self.refresh = bids_opts.refresh
+        self.clobber = bids_opts.clobber
+        self.opts = bids_opts
 
         super().__init__(config, session, importer, **kwargs)
-        return
+
+    def needs_raw_data(self):
+        return not self.outputs_exist() and not self.refresh
 
     def outputs_exist(self):
         if self.refresh:
@@ -109,23 +108,165 @@ def outputs_exist(self):
             )
             return False
 
-        out_dir = Path(self.output_dir)
-        if not out_dir.exists():
+        if not os.path.exists(self.output_dir):
             return False
 
-        json_files = out_dir.rglob("*.json")
-
-
-        expected_scans = self.get_expected_scans()
-        actual_scans = self.get_actual_scans()
-        _, missing = self.check_contents(expected_scans, actual_scans)
-        if missing:
+        if not self.session._bids_inventory:
             return False
 
+        # Assume everything exists if anything does :(
         return True
 
-    def get_contents(self):
-        outputs = {}
+    def export(self, raw_data_dir, **kwargs):
+        if self.outputs_exist():
+            return
+
+        if self.dry_run:
+            logger.info(f"Dry run: Skipping bids export to {self.output_dir}")
+            return
+
+        # Store user settings in case they change during export
+        orig_force = self.opts.force_dcm2niix
+        orig_refresh = self.refresh
+
+        # Does this still work for repeats?
+        if int(self.repeat) > 1:
+            # Must force dcm2niix export if it's a repeat.
+            self.force_dcm2niix = True
+
+        self.make_output_dir()
+
+        try:
+            self.run_dcm2bids(raw_data_dir)
+        except Exception as e:
+            logger.error(f"Failed to extract to BIDs - {e}")
+
+        # For CLM CHO / basic format. Gotta make sure apptainer exists
+        # apptainer run \
+        # -B ${outputdir} \
+        # /scratch/edickie/CLM01_pilots/containers/dcm2bids-3.2.0.sif \
+        # -d ${outputdir}/dicoms/CLM01_CHO_00000003_01_SE01_MR/ \
+        # -p "sub-CHO00000004" \
+        # -s "ses-01" \
+        # -c ${outputdir}/dcm2bids_3chorom.json \
+        # -o ${outputdir}/bids \
+        # --auto_extract_entities
+
+        # Test command. Exporter may need to 'hang on to' the metadata folder
+        # path and the file name for the dcm2bids.json (since the file given
+        # can be named anything and shouldn't be assumed)
+        # Note also: all bound paths must exist before running
+        # apptainer run -B /scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/:/input -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/metadata:/metadata -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids:/output ${BIDS_CONTAINER} -d /input -p "sub-CHO00000003" -s "ses-01" -c /metadata/dcm2bids.json -o /output --auto_extract_entities
+
+        if int(self.repeat) > 1:
+            # Must run a second time to move the new niftis out of the tmp dir
+            self.force_dcm2niix = False
+            self.refresh = True
+            try:
+                self.run_dcm2bids(raw_data_dir)
+            except Exception as e:
+                logger.error(f"Failed to extract data. {e}")
+
+        self.force_dcm2niix = orig_force
+        self.refresh = orig_refresh
+
+        try:
+            self.add_repeat_num()
+        except (PermissionError, JSONDecodeError):
+            logger.error(
+                "Failed to add repeat numbers to sidecars in "
+                f"{self.output_dir}. If a repeat scan is added, scans may "
+                "incorrectly be tagged as belonging to the later repeat."
+            )
+
+    def run_dcm2bids(self, raw_data_dir):
+        input_dir = self._get_scan_dir(raw_data_dir)
+
+        if self.refresh and not os.path.exists(input_dir):
+            logger.error(
+                f"Cannot refresh contents of {self.output_dir}, no "
+                f"files found at {input_dir}.")
+            return
+
+        cmd = self.make_command(input_dir)
+        return_code, output = run(cmd)
+        print(return_code)
+        print(output)
+
+    def _get_scan_dir(self, download_dir):
+        if self.refresh:
+            # Use existing tmp_dir instead of raw dcms
+            return self.bids_tmp
+        return download_dir
+
+    def make_command(self, raw_data_dir):
+        # CLM01_CHO_00000003_01_01
+
+        # ???? is this an issue because I downloaded them?
+        # dcm_dic = 'scans/9_DTI_HCP_b2400_AP_ADC'
+
+        # bids_sub = 'CHO00000003'
+        # bids_ses = '01'
+        # repeat = '01'
+        # bids_folder = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/'
+        # bids_tmp = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/tmp_dcm2bids/sub-CHO00000003_ses-01'
+        # output_dir = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/sub-CHO00000003/ses-01'
+
+        # raw_data_dir = "/scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/"
+
+        conf_dir, conf_file = os.path.split(self.opts.dcm2bids_config)
+
+        container_path = os.getenv("BIDS_CONTAINER")
+        if container_path:
+            cmd = [
+                "apptainer run",
+                f"-B {raw_data_dir}:/input",
+                f"-B {conf_dir}:/config",
+                f"-B {self.bids_folder}:/output",
+                f"{container_path}",
+                "-d /input",
+                f"-c /config/{conf_file}",
+                "-o /output"
+            ]
+        else:
+            cmd = [
+                "dcm2bids",
+                f"-d {raw_data_dir}",
+                f"-c {self.opts.dcm2bids_config}",
+                f"-o {self.bids_folder}"
+            ]
+
+        cmd.extend([
+            f"-p '{self.bids_sub}'",
+            f"-s '{self.bids_ses}'",
+            f"-l {self.opts.log_level}"
+        ])
+
+        if self.opts.clobber:
+            cmd.append("--clobber")
+
+        if self.opts.force_dcm2niix:
+            cmd.append("--forceDcm2niix")
+
+        for item in self.opts.extra_opts:
+            cmd.append(f"--{item}")
+
+        return cmd
+
+    def add_repeat_num(self):
+        for sidecar in Path(self.output_dir).rglob("*.json"):
+
+            contents = read_sidecar(sidecar)
+            if not contents:
+                continue
+
+            if "Repeat" in contents:
+                continue
+
+            contents["Repeat"] = self.repeat
+            # Remove "Path" so it doesnt get written to the output file
+            del contents["Path"]
+            write_json(sidecar, contents)
 
 
 class NiiLinkExporter(SessionExporter):
@@ -208,31 +349,6 @@ def link_scan(self, dm_name: str, bids_root: Path | str):
             rel_source = get_relative_source(source, target)
             make_link(rel_source, target)
 
-    def get_dm_names(self):
-        """Get the datman-style scan names for an entire XNAT experiment.
-
-        This is used to
-            1) Ensure the contents of the nii folder matches what may have
-               been produced with an old-style NiiExporter
-            2) To predict if an expected scan didn't extract correctly into
-               the bids folder.
-
-        Returns:
-            dict: A map of each series number to the name (or
-                names) the series would be exported under.
-        """
-        names = {}
-        for scan in self.experiment.scans:
-            try:
-                series_num = int(scan.series)
-            except ValueError:
-                # Ignore xnat scans with non-numeric series numbers.
-                # These are often of the form MR-XX and result from duplicated
-                # uploads / errors when merging on xnat.
-                continue
-            names[series_num] = scan.names
-        return names
-
     def get_bids_sidecars(self) -> dict[int, list]:
         """Get all sidecars from the session's BIDS folder.
 
@@ -244,7 +360,7 @@ def get_bids_sidecars(self) -> dict[int, list]:
         bids_folder = Path(self.bids_path)
         for sidecar in bids_folder.rglob("*.json"):
 
-            contents = self.read_sidecar(sidecar)
+            contents = read_sidecar(sidecar)
             if not contents:
                 continue
 
@@ -272,32 +388,6 @@ def get_bids_sidecars(self) -> dict[int, list]:
 
         return sidecars
 
-    def read_sidecar(self, sidecar: str | Path) -> dict:
-        """Read the contents of a JSON sidecar file.
-
-        NOTE: This adds the path of the file itself under the key 'Path'
-        """
-        if not isinstance(sidecar, Path):
-            sidecar = Path(sidecar)
-
-        try:
-            contents = sidecar.read_text(encoding="utf-8")
-        except (UnicodeDecodeError, OSError) as e:
-            logger.debug(
-                f"Sidecar file is unreadable {sidecar} - {e}"
-            )
-            return {}
-
-        try:
-            data = json.loads(contents)
-        except (json.JSONDecodeError, TypeError) as e:
-            logger.debug(f"Invalid json sidecar {sidecar} - {e}")
-            return {}
-
-        data["Path"] = sidecar
-
-        return data
-
     def matches_repeat(self, sidecar: dict) -> bool:
         """Check if a sidecar matches the current session's 'repeat'.
 
@@ -451,7 +541,7 @@ def get_tag_requirements(self) -> dict[str, dict]:
 
             conf = self.tags.get(tag)
 
-            if is_malformed(conf):
+            if is_malformed_conf(conf):
                 logger.error(
                     f"Ignoring tag {tag} - Incorrectly configured. Each tag "
                     "must contain a 'Pattern' section and each 'Pattern', at "
@@ -590,7 +680,8 @@ def handle_duplicate_names(self,
 
         return existing_names
 
-def is_malformed(config: dict) -> bool:
+
+def is_malformed_conf(config: dict) -> bool:
     """Check if a tag's configuration is unusably malformed.
     """
     if "Pattern" not in config:
@@ -622,4 +713,30 @@ def make_link(source: str, target: str):
     except FileExistsError:
         pass
     except OSError as e:
-        logger.error(f"Failed to create {target} - {e}")
\ No newline at end of file
+        logger.error(f"Failed to create {target} - {e}")
+
+def read_sidecar(sidecar: str | Path) -> dict:
+    """Read the contents of a JSON sidecar file.
+
+    NOTE: This adds the path of the file itself under the key 'Path'
+    """
+    if not isinstance(sidecar, Path):
+        sidecar = Path(sidecar)
+
+    try:
+        contents = sidecar.read_text(encoding="utf-8")
+    except (UnicodeDecodeError, OSError) as e:
+        logger.debug(
+            f"Sidecar file is unreadable {sidecar} - {e}"
+        )
+        return {}
+
+    try:
+        data = json.loads(contents)
+    except (json.JSONDecodeError, TypeError) as e:
+        logger.debug(f"Invalid json sidecar {sidecar} - {e}")
+        return {}
+
+    data["Path"] = sidecar
+
+    return data
\ No newline at end of file
diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py
index 1b6a5e86..74fabc75 100644
--- a/datman/exporters/bids_legacy.py
+++ b/datman/exporters/bids_legacy.py
@@ -380,12 +380,6 @@ def export(self, raw_data_dir, **kwargs):
         if self.outputs_exist():
             return
 
-        # Was this ever needed? The class should never have been made.
-        # if not DCM2BIDS_FOUND:
-        #     logger.info(f"Unable to export to {self.output_dir}, "
-        #                 "Dcm2Bids not found.")
-        #     return
-
         if self.dry_run:
             logger.info(f"Dry run: Skipping bids export to {self.output_dir}")
             return

From 6d72159318d12a38cf7243700753177befc938c2 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Fri, 15 Aug 2025 19:40:37 -0400
Subject: [PATCH 43/45] [FIX] Ensure extra bids opts always defaults to list

---
 bin/dm_xnat_extract.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/dm_xnat_extract.py b/bin/dm_xnat_extract.py
index 9ed2ad9d..9d73f436 100755
--- a/bin/dm_xnat_extract.py
+++ b/bin/dm_xnat_extract.py
@@ -87,7 +87,7 @@ def main():
             bids_out=args.bids_out,
             log_level=log_level,
             refresh=args.refresh,
-            extra_opts=tool_opts.get('--dcm2bids-')
+            extra_opts=tool_opts.get('--dcm2bids-', [])
         )
     else:
         bids_opts = None

From 05f3554e8cc8e75491a6662efff6fb40bb49c1fd Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Fri, 15 Aug 2025 21:06:03 -0400
Subject: [PATCH 44/45] [PEP8] Fix style issues

---
 datman/exporters/__init__.py    |  11 ++-
 datman/exporters/base.py        |   4 +-
 datman/exporters/bids.py        | 145 +++++++++++++++-----------------
 datman/exporters/bids_legacy.py | 130 ++--------------------------
 datman/exporters/dashboard.py   |   4 +-
 datman/exporters/legacy.py      |   2 +-
 6 files changed, 85 insertions(+), 211 deletions(-)

diff --git a/datman/exporters/__init__.py b/datman/exporters/__init__.py
index 378ec493..655ed613 100644
--- a/datman/exporters/__init__.py
+++ b/datman/exporters/__init__.py
@@ -1,7 +1,10 @@
-import os
+"""Import classes used to export dicom data to various formats.
+"""
+
 import importlib
-import pkgutil
 import logging
+import os
+import pkgutil
 from packaging.version import parse
 
 from datman.utils import check_dependency_configured
@@ -38,7 +41,7 @@ def is_runnable_container(container):
     try:
         check_dependency_configured("apptainer", shell_cmd="apptainer")
     except EnvironmentError:
-        logger.error(f"apptainer is not available, ignoring container.")
+        logger.error("apptainer is not available, ignoring container.")
         return False
 
     if not os.path.exists(container):
@@ -66,7 +69,7 @@ def is_runnable_container(container):
         _load_contents("bids")
         DCM2BIDS_FOUND = True
     else:
-        logger.error(f"Cannot use dcm2bids container, ignoring bids.")
+        logger.error("Cannot use dcm2bids container, ignoring bids.")
         DCM2BIDS_FOUND = False
 else:
     try:
diff --git a/datman/exporters/base.py b/datman/exporters/base.py
index 6cd81237..af8c4208 100644
--- a/datman/exporters/base.py
+++ b/datman/exporters/base.py
@@ -6,9 +6,9 @@ class that inherits from either SessionExporter if it must work on an entire
 scan series at a time.
 """
 
-from abc import ABC, abstractmethod
-import os
 import logging
+import os
+from abc import ABC, abstractmethod
 
 logger = logging.getLogger(__name__)
 
diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index 7712c7f0..0f0a7879 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -4,23 +4,23 @@
 import logging
 import json
 import re
+import dataclasses
 from glob import glob
-from dataclasses import dataclass
 from pathlib import Path
 
 import datman.config
-from .base import SessionExporter
 from datman.exceptions import MetadataException
 from datman.utils import (locate_metadata, read_blacklist, get_relative_source,
                           get_extension, write_json, run)
 from datman.scanid import make_filename
+from .base import SessionExporter
 
 logger = logging.getLogger(__name__)
 
 __all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"]
 
 
-@dataclass
+@dataclasses.dataclass
 class BidsOptions:
     """Helper class for options related to exporting to BIDS format.
     """
@@ -32,7 +32,7 @@ class BidsOptions:
     dcm2bids_config: str | None = None
     log_level: str = "INFO"
     refresh: bool = False
-    extra_opts: list = None
+    extra_opts: list = dataclasses.field(default_factory=list)
 
     def __post_init__(self):
         self.dcm2bids_config = self.get_bids_config(
@@ -73,10 +73,19 @@ def get_bids_config(self, config: datman.config.config,
 
 
 class BidsExporter(SessionExporter):
+    """Populates a study's bids folder.
+    """
 
     type = "bids"
 
-    def __init__(self, config, session, importer, bids_opts=None, **kwargs):
+    def __init__(
+            self,
+            config: datman.config.config,
+            session: 'datman.scan.Scan',
+            importer: 'datman.importers.SessionImporter',
+            bids_opts: BidsOptions = None,
+            **kwargs
+    ):
         self.dcm_dir = importer.dcm_subdir
         self.bids_sub = session._ident.get_bids_name()
         self.bids_ses = session._ident.timepoint
@@ -85,24 +94,22 @@ def __init__(self, config, session, importer, bids_opts=None, **kwargs):
         self.bids_tmp = os.path.join(session.bids_root, "tmp_dcm2bids",
                                      f"{session.bids_sub}_{session.bids_ses}")
         self.output_dir = session.bids_path
-        self.refresh = bids_opts.refresh
-        self.clobber = bids_opts.clobber
         self.opts = bids_opts
 
         super().__init__(config, session, importer, **kwargs)
 
-    def needs_raw_data(self):
-        return not self.outputs_exist() and not self.refresh
+    def needs_raw_data(self) -> bool:
+        return not self.outputs_exist() and not self.opts.refresh
 
-    def outputs_exist(self):
-        if self.refresh:
+    def outputs_exist(self) -> bool:
+        if self.opts.refresh:
             logger.info(
                 f"Re-comparing existing tmp folder for {self.output_dir}"
                 "to dcm2bids config to pull missed series."
             )
             return False
 
-        if self.clobber:
+        if self.opts.clobber:
             logger.info(
                 f"{self.output_dir} will be overwritten due to clobber option."
             )
@@ -114,10 +121,10 @@ def outputs_exist(self):
         if not self.session._bids_inventory:
             return False
 
-        # Assume everything exists if anything does :(
+        # Assume everything exists if anything does
         return True
 
-    def export(self, raw_data_dir, **kwargs):
+    def export(self, raw_data_dir: str, **kwargs):
         if self.outputs_exist():
             return
 
@@ -125,94 +132,63 @@ def export(self, raw_data_dir, **kwargs):
             logger.info(f"Dry run: Skipping bids export to {self.output_dir}")
             return
 
-        # Store user settings in case they change during export
-        orig_force = self.opts.force_dcm2niix
-        orig_refresh = self.refresh
-
-        # Does this still work for repeats?
         if int(self.repeat) > 1:
-            # Must force dcm2niix export if it's a repeat.
-            self.force_dcm2niix = True
+            # Must force dcm2niix if it's a repeat.
+            force_dcm2niix = True
+        else:
+            force_dcm2niix = self.opts.force_dcm2niix
 
         self.make_output_dir()
 
         try:
-            self.run_dcm2bids(raw_data_dir)
+            self.run_dcm2bids(raw_data_dir, force_dcm2niix=force_dcm2niix)
         except Exception as e:
             logger.error(f"Failed to extract to BIDs - {e}")
 
-        # For CLM CHO / basic format. Gotta make sure apptainer exists
-        # apptainer run \
-        # -B ${outputdir} \
-        # /scratch/edickie/CLM01_pilots/containers/dcm2bids-3.2.0.sif \
-        # -d ${outputdir}/dicoms/CLM01_CHO_00000003_01_SE01_MR/ \
-        # -p "sub-CHO00000004" \
-        # -s "ses-01" \
-        # -c ${outputdir}/dcm2bids_3chorom.json \
-        # -o ${outputdir}/bids \
-        # --auto_extract_entities
-
-        # Test command. Exporter may need to 'hang on to' the metadata folder
-        # path and the file name for the dcm2bids.json (since the file given
-        # can be named anything and shouldn't be assumed)
-        # Note also: all bound paths must exist before running
-        # apptainer run -B /scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/:/input -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/metadata:/metadata -B /scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids:/output ${BIDS_CONTAINER} -d /input -p "sub-CHO00000003" -s "ses-01" -c /metadata/dcm2bids.json -o /output --auto_extract_entities
-
         if int(self.repeat) > 1:
             # Must run a second time to move the new niftis out of the tmp dir
-            self.force_dcm2niix = False
-            self.refresh = True
             try:
-                self.run_dcm2bids(raw_data_dir)
+                self.run_dcm2bids(
+                    raw_data_dir, force_dcm2niix=False, refresh=True
+                )
             except Exception as e:
                 logger.error(f"Failed to extract data. {e}")
 
-        self.force_dcm2niix = orig_force
-        self.refresh = orig_refresh
-
         try:
             self.add_repeat_num()
-        except (PermissionError, JSONDecodeError):
+        except (PermissionError, json.JSONDecodeError):
             logger.error(
                 "Failed to add repeat numbers to sidecars in "
                 f"{self.output_dir}. If a repeat scan is added, scans may "
                 "incorrectly be tagged as belonging to the later repeat."
             )
 
-    def run_dcm2bids(self, raw_data_dir):
-        input_dir = self._get_scan_dir(raw_data_dir)
+    def run_dcm2bids(self, raw_data_dir: str, force_dcm2niix: bool = False,
+                     refresh: bool = False):
+        input_dir = self._get_scan_dir(raw_data_dir, refresh)
 
-        if self.refresh and not os.path.exists(input_dir):
+        if refresh and not os.path.exists(input_dir):
             logger.error(
                 f"Cannot refresh contents of {self.output_dir}, no "
                 f"files found at {input_dir}.")
             return
 
-        cmd = self.make_command(input_dir)
+        cmd = self.make_command(input_dir, force_dcm2niix)
         return_code, output = run(cmd)
-        print(return_code)
-        print(output)
+        if return_code:
+            logger.error(f"Failed when running dcm2bids - {output}")
 
-    def _get_scan_dir(self, download_dir):
-        if self.refresh:
+    def _get_scan_dir(self, download_dir: str, refresh: bool = False) -> str:
+        if refresh:
             # Use existing tmp_dir instead of raw dcms
             return self.bids_tmp
         return download_dir
 
-    def make_command(self, raw_data_dir):
-        # CLM01_CHO_00000003_01_01
-
-        # ???? is this an issue because I downloaded them?
-        # dcm_dic = 'scans/9_DTI_HCP_b2400_AP_ADC'
-
-        # bids_sub = 'CHO00000003'
-        # bids_ses = '01'
-        # repeat = '01'
-        # bids_folder = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/'
-        # bids_tmp = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/tmp_dcm2bids/sub-CHO00000003_ses-01'
-        # output_dir = '/scratch/dawn/temp_stuff/new_bids/test_archive/CLM01_CHO/data/bids/sub-CHO00000003/ses-01'
-
-        # raw_data_dir = "/scratch/dawn/temp_stuff/new_bids/test_archive/tmp_extract/"
+    def make_command(
+            self, raw_data_dir: str, force_dcm2niix: bool = False
+    ) -> list[str]:
+        """Construct the dcm2bids command based on on user configuration.
+        """
 
         conf_dir, conf_file = os.path.split(self.opts.dcm2bids_config)
 
@@ -245,8 +221,8 @@ def make_command(self, raw_data_dir):
         if self.opts.clobber:
             cmd.append("--clobber")
 
-        if self.opts.force_dcm2niix:
-            cmd.append("--forceDcm2niix")
+        if force_dcm2niix:
+            cmd.append("--force_dcm2bids")
 
         for item in self.opts.extra_opts:
             cmd.append(f"--{item}")
@@ -254,6 +230,12 @@ def make_command(self, raw_data_dir):
         return cmd
 
     def add_repeat_num(self):
+        """Add the sessions 'repeat' number to all of its json sidecars.
+
+        This is used to allow us to track which files belong to which session
+        when there's more than one (i.e. if there's an 01_02 and so forth
+        instead of just 01_01)
+        """
         for sidecar in Path(self.output_dir).rglob("*.json"):
 
             contents = read_sidecar(sidecar)
@@ -401,8 +383,10 @@ def matches_repeat(self, sidecar: dict) -> bool:
             return self.repeat == "01"
         return sidecar["Repeat"] == self.repeat
 
-    def fix_split_series_nums(self, sidecars: dict[int, list]
-            ) -> dict[int, list]:
+    def fix_split_series_nums(
+            self,
+            sidecars: dict[int, list]
+    ) -> dict[int, list]:
         """Attempt to correct series nums that have been prefixed with '10'.
 
         Some older versions of dcm2niix/dcm2bids liked to prefix half of a
@@ -640,10 +624,11 @@ def find_tag(self,
 
         return found
 
-    def handle_duplicate_names(self,
-                               existing_names: dict[str, str],
-                               new_entries: dict[str, dict]
-        ) -> dict[str, str]:
+    def handle_duplicate_names(
+        self,
+        existing_names: dict[str, str],
+        new_entries: dict[str, dict]
+    ) -> dict[str, str]:
         """Make duplicated names unique.
 
         Sometimes, as with multi-echo scans, multiple BIDs files will create
@@ -690,6 +675,7 @@ def is_malformed_conf(config: dict) -> bool:
         return True
     return False
 
+
 def remove_extension(path: Path) -> Path:
     """Remove all extensions from a path.
     """
@@ -697,15 +683,17 @@ def remove_extension(path: Path) -> Path:
         path = path.with_suffix("")
     return path
 
+
 def is_broken_link(symlink: str) -> bool:
     return os.path.islink(symlink) and not os.path.exists(symlink)
 
+
 def remove_broken_link(target: str):
     try:
         os.unlink(target)
     except OSError as e:
         logger.error(f"Failed to remove broken symlink {target} - {e}")
-    return
+
 
 def make_link(source: str, target: str):
     try:
@@ -715,6 +703,7 @@ def make_link(source: str, target: str):
     except OSError as e:
         logger.error(f"Failed to create {target} - {e}")
 
+
 def read_sidecar(sidecar: str | Path) -> dict:
     """Read the contents of a JSON sidecar file.
 
@@ -739,4 +728,4 @@ def read_sidecar(sidecar: str | Path) -> dict:
 
     data["Path"] = sidecar
 
-    return data
\ No newline at end of file
+    return data
diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py
index 74fabc75..283a24ae 100644
--- a/datman/exporters/bids_legacy.py
+++ b/datman/exporters/bids_legacy.py
@@ -10,22 +10,21 @@
 manual intervention). It can also force dcm2bids to properly export repeat
 sessions into the same folder, where newer versions will simply ignore them.
 """
-from collections import OrderedDict
-from glob import glob
-from json import JSONDecodeError
 import logging
 import os
 import re
+from collections import OrderedDict
+from glob import glob
+from json import JSONDecodeError
+
+from dcm2bids import dcm2bids, Dcm2bids
+from dcm2bids.sidecar import Acquisition
 
 from datman.exceptions import MetadataException
 from datman.scanid import make_filename
 from datman.utils import (splitext, get_extension, write_json, read_json,
                           filter_niftis, read_blacklist, get_relative_source,
                           locate_metadata)
-
-from dcm2bids import dcm2bids, Dcm2bids
-from dcm2bids.sidecar import Acquisition
-
 from .base import SessionExporter
 
 logger = logging.getLogger(__name__)
@@ -594,123 +593,6 @@ def get_sidecars(self):
         contents = {path: read_json(path) for path in sidecars}
         return contents
 
-    def find_missing_scans(self):
-        """Find scans that exist on xnat but are missing from the bids folder.
-        """
-        class FakeSidecar(dcm2bids.Sidecar):
-            """Turns XNAT series descriptions into pseudo-sidecars.
-            """
-            def __init__(self, xnat_scan):
-                self.scan = xnat_scan
-                self.data = xnat_scan
-                self.compKeys = dcm2bids.DEFAULT.compKeys
-
-                # Placeholders for compatibility with dcm2bids.Sidecar
-                self.root = (
-                    f"/tmp/{xnat_scan.series}"
-                    + f"_{xnat_scan.description}"
-                    + f"_{xnat_scan.subject}"
-                )
-                self.filename = f"{self.root}.json"
-                self.data["SidecarFilename"] = self.filename
-
-            @property
-            def data(self):
-                return self._data
-
-            @data.setter
-            def data(self, scan):
-                self._data = OrderedDict()
-                self._data['SeriesDescription'] = scan.description
-                self._data['SeriesNumber'] = scan.series
-
-            def __repr__(self):
-                return f"<FakeSidecar {self.data['SeriesDescription']}>"
-
-        def get_expected_names(participant, sidecars, bids_conf):
-            parser = dcm2bids.SidecarPairing(
-                sidecars, bids_conf["descriptions"]
-            )
-            parser.build_graph()
-            parser.build_acquisitions(participant)
-            parser.find_runs()
-            return [acq.dstRoot for acq in parser.acquisitions]
-
-        def remove_criteria(descriptions):
-            trim_conf = []
-            for descr in bids_conf['descriptions']:
-                new_descr = descr.copy()
-                if len(descr['criteria']) > 1:
-                    new_descr['criteria'] = OrderedDict()
-                    new_descr['criteria']['SeriesDescription'] = descr[
-                        'criteria']['SeriesDescription']
-                trim_conf.append(new_descr)
-            return trim_conf
-
-        participant = dcm2bids.Participant(
-            self.bids_sub, session=self.bids_ses
-        )
-
-        bids_conf = dcm2bids.load_json(self.dcm2bids_config)
-
-        local_sidecars = []
-        for search_path in [self.output_dir, self.bids_tmp]:
-            for item in self.find_outputs(".json", start_dir=search_path):
-                sidecar = dcm2bids.Sidecar(item)
-                if ('Repeat' in sidecar.data and
-                        sidecar.data['Repeat'] != self.repeat):
-                    continue
-                local_sidecars.append(sidecar)
-        local_sidecars = sorted(local_sidecars)
-
-        xnat_sidecars = []
-        for scan in self.experiment.scans:
-            xnat_sidecars.append(FakeSidecar(scan))
-        xnat_sidecars = sorted(xnat_sidecars)
-
-        local_scans = get_expected_names(
-            participant, local_sidecars, bids_conf
-        )
-
-        # Use a more permissive bids_conf when finding xnat acqs
-        xnat_parser = dcm2bids.SidecarPairing(
-            xnat_sidecars, remove_criteria(bids_conf['descriptions'])
-        )
-        xnat_parser.build_graph()
-        xnat_parser.build_acquisitions(participant)
-        # Use this to find scans that have extra 'criteria' for single match
-        extra_acqs = []
-        for sidecar, descriptions in xnat_parser.graph.items():
-            if len(descriptions) > 1:
-                for descr in descriptions:
-                    acq = Acquisition(participant, srcSidecar=sidecar, **descr)
-                    extra_acqs.append(acq)
-        xnat_parser.acquisitions.extend(extra_acqs)
-        xnat_parser.find_runs()
-        xnat_scans = [acq.dstRoot for acq in xnat_parser.acquisitions]
-
-        missing_scans = []
-        for scan in xnat_scans:
-            if scan not in local_scans:
-                if "run-01" in scan:
-                    norun_scan = scan.replace("_run-01", "")
-                    if norun_scan not in local_scans:
-                        missing_scans.append(scan)
-                else:
-                    missing_scans.append(scan)
-
-        extra_scans = []
-        for scan in local_scans:
-            if scan not in xnat_scans:
-                if "run-01" in scan:
-                    norun_scan = scan.replace("_run-01", "")
-                    if norun_scan not in xnat_scans:
-                        extra_scans.append(scan)
-                else:
-                    extra_scans.append(scan)
-
-        return missing_scans, extra_scans
-
 
 class NiiLinkExporter(SessionExporter):
     """Populates a study's nii folder with symlinks pointing to the bids dir.
diff --git a/datman/exporters/dashboard.py b/datman/exporters/dashboard.py
index 86cdd290..5b5187ab 100644
--- a/datman/exporters/dashboard.py
+++ b/datman/exporters/dashboard.py
@@ -1,10 +1,9 @@
 """An exporter to push raw datman files into the QC dashboard.
 """
-from datetime import datetime
 import logging
 import os
+from datetime import datetime
 
-from .base import SessionExporter
 import datman.config
 import datman.dashboard
 from datman.exceptions import (ConfigException, DashboardException,
@@ -12,6 +11,7 @@
 from datman.scanid import (KCNIIdentifier, parse, parse_bids_filename,
                            ParseException)
 from datman.utils import find_tech_notes, get_extension
+from .base import SessionExporter
 
 logger = logging.getLogger(__name__)
 
diff --git a/datman/exporters/legacy.py b/datman/exporters/legacy.py
index 977e64f0..df2a7fd0 100644
--- a/datman/exporters/legacy.py
+++ b/datman/exporters/legacy.py
@@ -12,8 +12,8 @@
 
 import pydicom as dicom
 
-from .base import SeriesExporter
 from datman.utils import run, make_temp_directory, get_extension
+from .base import SeriesExporter
 
 logger = logging.getLogger(__name__)
 

From a3bd70225867c6844d9e61528aebe87774a90ce5 Mon Sep 17 00:00:00 2001
From: Dawn Smith <Dawn.Smith@camh.ca>
Date: Mon, 18 Aug 2025 20:42:56 -0400
Subject: [PATCH 45/45] [ENH] Refactor away the old NiiLinkExporter

---
 datman/exporters/base.py        |  32 ++-
 datman/exporters/bids.py        | 490 +-------------------------------
 datman/exporters/bids_legacy.py | 367 +-----------------------
 datman/exporters/nii_symlink.py | 468 ++++++++++++++++++++++++++++++
 4 files changed, 503 insertions(+), 854 deletions(-)
 create mode 100644 datman/exporters/nii_symlink.py

diff --git a/datman/exporters/base.py b/datman/exporters/base.py
index af8c4208..67544c26 100644
--- a/datman/exporters/base.py
+++ b/datman/exporters/base.py
@@ -5,14 +5,15 @@ class that inherits from either SessionExporter if it must work on an entire
 scan session at once, or a SeriesExporter if it works on a single individual
 scan series at a time.
 """
-
+import json
 import logging
 import os
 from abc import ABC, abstractmethod
+from pathlib import Path
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["SeriesExporter", "SessionExporter"]
+__all__ = ["SeriesExporter", "SessionExporter", "read_sidecar"]
 
 
 class Exporter(ABC):
@@ -120,3 +121,30 @@ def __repr__(self):
         fq_name = str(self.__class__).replace("<class '", "").replace("'>", "")
         name = fq_name.rsplit(".", maxsplit=1)[-1]
         return f"<{name} - {self.fname_root}>"
+
+
+def read_sidecar(sidecar: str | Path) -> dict:
+    """Read the contents of a JSON sidecar file.
+
+    NOTE: This adds the path of the file itself under the key 'Path'
+    """
+    if not isinstance(sidecar, Path):
+        sidecar = Path(sidecar)
+
+    try:
+        contents = sidecar.read_text(encoding="utf-8")
+    except (UnicodeDecodeError, OSError) as e:
+        logger.debug(
+            f"Sidecar file is unreadable {sidecar} - {e}"
+        )
+        return {}
+
+    try:
+        data = json.loads(contents)
+    except (json.JSONDecodeError, TypeError) as e:
+        logger.debug(f"Invalid json sidecar {sidecar} - {e}")
+        return {}
+
+    data["Path"] = sidecar
+
+    return data
diff --git a/datman/exporters/bids.py b/datman/exporters/bids.py
index 0f0a7879..d9af464f 100644
--- a/datman/exporters/bids.py
+++ b/datman/exporters/bids.py
@@ -3,21 +3,17 @@
 import os
 import logging
 import json
-import re
 import dataclasses
-from glob import glob
 from pathlib import Path
 
 import datman.config
 from datman.exceptions import MetadataException
-from datman.utils import (locate_metadata, read_blacklist, get_relative_source,
-                          get_extension, write_json, run)
-from datman.scanid import make_filename
-from .base import SessionExporter
+from datman.utils import locate_metadata, write_json, run
+from .base import SessionExporter, read_sidecar
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"]
+__all__ = ["BidsExporter", "BidsOptions"]
 
 
 @dataclasses.dataclass
@@ -249,483 +245,3 @@ def add_repeat_num(self):
             # Remove "Path" so it doesnt get written to the output file
             del contents["Path"]
             write_json(sidecar, contents)
-
-
-class NiiLinkExporter(SessionExporter):
-    """Populates a study's nii folder with symlinks pointing to the bids dir.
-    """
-
-    type = "nii_link"
-    ext = ".nii.gz"
-
-    def __init__(self, config, session, importer, **kwargs):
-        self.ident = session._ident
-        self.output_dir = session.nii_path
-        self.bids_path = session.bids_path
-        self.repeat = session.session
-        self.config = config
-        self.tags = config.get_tags(site=session.site)
-
-        super().__init__(config, session, importer, **kwargs)
-
-    @classmethod
-    def get_output_dir(cls, session):
-        return session.nii_path
-
-    def needs_raw_data(self):
-        return False
-
-    def outputs_exist(self):
-        sidecars = self.get_bids_sidecars()
-        name_map = self.make_dm_names(sidecars)
-
-        for dm_name in name_map:
-
-            if read_blacklist(scan=dm_name, config=self.config):
-                continue
-
-            full_path = os.path.join(self.output_dir, dm_name + self.ext)
-            if not os.path.exists(full_path):
-                return False
-
-        return True
-
-    def export(self, *args, **kwargs):
-        sidecars = self.get_bids_sidecars()
-        name_map = self.make_dm_names(sidecars)
-
-        if self.dry_run:
-            logger.info("Dry run: Skipping making nii folder links for "
-                        f"mapping {name_map}")
-            return
-
-        if self.outputs_exist():
-            return
-
-        self.make_output_dir()
-
-        for dm_name, bids_name in name_map.items():
-            self.link_scan(dm_name, bids_name)
-
-    def link_scan(self, dm_name: str, bids_root: Path | str):
-        """Create a symlink in the datman style that points to a bids file.
-
-        Args:
-            dm_name (:obj:`str`): A valid datman file name.
-            bids_root (:obj:`pathlib.Path`): The full path to a bids file
-                (without an extension).
-        """
-
-        if read_blacklist(scan=dm_name, config=self.config):
-            logger.debug(f"Ignoring blacklisted scan {dm_name}")
-            return
-
-        base_target = os.path.join(self.output_dir, dm_name)
-        for source in glob(str(bids_root) + "*"):
-            ext = get_extension(source)
-            target = base_target + ext
-
-            if is_broken_link(target):
-                remove_broken_link(target)
-
-            rel_source = get_relative_source(source, target)
-            make_link(rel_source, target)
-
-    def get_bids_sidecars(self) -> dict[int, list]:
-        """Get all sidecars from the session's BIDS folder.
-
-        Returns:
-            :obj:`dict`: A map from the series number to a list of the JSON
-                sidecar contents that result from that series.
-        """
-        sidecars = {}
-        bids_folder = Path(self.bids_path)
-        for sidecar in bids_folder.rglob("*.json"):
-
-            contents = read_sidecar(sidecar)
-            if not contents:
-                continue
-
-            if not self.matches_repeat(contents):
-                continue
-
-            if "SeriesNumber" not in contents:
-                logger.debug(
-                    "Ignoring malformed sidecar file (missing SeriesNumber): "
-                    f"{sidecar}"
-                )
-                continue
-
-            try:
-                series_num = int(contents["SeriesNumber"])
-            except ValueError:
-                logger.debug(
-                    f"Ignoring non-numeric series number in {sidecar}"
-                )
-                continue
-
-            sidecars.setdefault(series_num, []).append(contents)
-
-        self.fix_split_series_nums(sidecars)
-
-        return sidecars
-
-    def matches_repeat(self, sidecar: dict) -> bool:
-        """Check if a sidecar matches the current session's 'repeat'.
-
-        The 'repeat' number is used to track when a scan session was stopped
-        and restarted during a visit. Most of the time it will be '01'.
-        """
-        if "Repeat" not in sidecar:
-            # If this session is the first 'repeat' it's safe to assume an
-            # untagged sidecar belongs to it, since usually there's only one
-            # 'repeat' anyway.
-            return self.repeat == "01"
-        return sidecar["Repeat"] == self.repeat
-
-    def fix_split_series_nums(
-            self,
-            sidecars: dict[int, list]
-    ) -> dict[int, list]:
-        """Attempt to correct series nums that have been prefixed with '10'.
-
-        Some older versions of dcm2niix/dcm2bids liked to prefix half of a
-        split series' number with '10' rather than allowing all sidecars
-        to share the original series num. This attempts to identify when
-        that has happened and find the original series number for these
-        files.
-        """
-        all_series = [str(series).zfill(2) for series in sidecars]
-        must_delete = []
-
-        for series in sidecars:
-            str_series = str(series)
-
-            if not str_series.startswith("10"):
-                continue
-
-            if len(str_series) < 4:
-                continue
-
-            trimmed_series = str_series[2:]
-            if trimmed_series not in all_series:
-                # False alarm, probably not a mutated series number
-                continue
-
-            sidecars[int(trimmed_series)].extend(sidecars[series])
-            must_delete.append(series)
-
-        for series in must_delete:
-            del sidecars[series]
-
-        return sidecars
-
-    def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]:
-        """Create a datman-style name for each identifiable sidecar.
-
-        Args:
-            sidecars (`dict`): A dictionary mapping series numbers to a list
-                of bids sidecar files generated by that series.
-
-        Returns:
-            dict: a dictionary mapping a datman-style filename to the bids
-                sidecar path (minus extension) it belongs to.
-        """
-        found_names = {}
-        reqs = self.get_tag_requirements()
-        for series in sidecars:
-
-            temp_names = {}
-            for item in sidecars[series]:
-
-                found = self.find_tag(item, reqs)
-
-                if not found:
-                    logger.debug(f"No tag matches {item['Path']}, ignoring.")
-                    continue
-
-                if len(found) > 1:
-                    logger.debug(
-                        f"Multiple tags ({found}) match sidecar "
-                        f"{item['Path']}. Ignoring it. Please update "
-                        "configuration so at most one tag matches."
-                    )
-                    continue
-
-                dm_name = make_filename(
-                    self.ident,
-                    found[0],
-                    series,
-                    item["SeriesDescription"]
-                )
-
-                temp_names.setdefault(dm_name, []).append(item)
-
-            found_names = self.handle_duplicate_names(found_names, temp_names)
-
-        return found_names
-
-    def get_tag_requirements(self) -> dict[str, dict]:
-        """Read and reformat user configuration for all tags.
-
-        As described in datman's configuration documentation, at a minimum each
-        tag must define a 'SeriesDescription' regular expression. Tags
-        may optionally include a 'Bids' section, alongside datman's
-        'Pattern' and 'Count' fields for a tag to make it more restrictive or
-        accurate.
-
-        If included, the 'Bids' section should contain a list of sidecar field
-        names to check when determining if a tag can by applied. These must
-        match the sidecars fields verbatim (case-sensitive). Each field name
-        may then point to either:
-
-            - a literal string to be matched
-            - a dictionary of settings
-
-        The dictionary of settings may include the following keys:
-
-        - **Pattern** (`str` or list, optional): May be a literal string or a
-          regular expression in Python format (e.g., use `.*` not `*`), or a
-          list of literal strings. Optional if `Exclude` is given. If omitted
-          and `Exclude` is used, the presence of the field name alone
-          excludes a sidecar from taking the tag.
-        - **Regex** (`bool`, optional): Indicates whether `Pattern` is a regex
-          or a string literal. Default is `False`.
-        - **Exclude** (`bool`, optional): Indicates whether to exclude sidecars
-          that match the pattern (i.e., take the inverse). Default is `False`.
-
-        Examples:
-            Below are some YAML examples of commonly used configuration.
-
-            Prevent any sidecar with an 'IntendedFor' field from matching
-            a tag:
-
-                Bids:
-                    IntendedFor:
-                        Exclude: True
-
-            Match a sidecar only if the PhaseEncodingDirection is exactly 'j':
-
-                Bids:
-                    PhaseEncodingDirection: 'j'
-
-            Match a sidecar only if the ImageType contains 'DERIVED':
-
-                Bids:
-                    ImageType:
-                        Pattern: 'DERIVED'
-                        Regex: True
-
-        Returns:
-            A dictionary mapping each tag name to the requirements that
-                must be met for a tag to be applied to a BIDs sidecar.
-        """
-        reqs = {}
-        for tag in self.tags:
-
-            conf = self.tags.get(tag)
-
-            if is_malformed_conf(conf):
-                logger.error(
-                    f"Ignoring tag {tag} - Incorrectly configured. Each tag "
-                    "must contain a 'Pattern' section and each 'Pattern', at "
-                    "a minimum, must contain a 'SeriesDescription'. Consult "
-                    "the docs for more info.")
-                continue
-
-            regex = conf["Pattern"]["SeriesDescription"]
-            if isinstance(regex, list):
-                regex = "|".join(regex)
-
-            tag_reqs = {
-                "SeriesDescription": {
-                    "Pattern": regex,
-                    "Regex": True,
-                    "Exclude": False
-                }
-            }
-
-            bids_conf = conf.get("Bids", {})
-            for field in bids_conf:
-                # Ensure consistent formatting for settings
-                if isinstance(bids_conf[field], (str, int)):
-                    pattern = str(bids_conf[field])
-                    regex = False
-                    exclude = False
-                else:
-                    pattern = bids_conf[field].get("Pattern", "")
-                    if not isinstance(pattern, str):
-                        pattern = str(pattern)
-                    regex = bids_conf[field].get("Regex", False)
-                    exclude = bids_conf[field].get("Exclude", False)
-
-                tag_reqs[field] = {
-                    "Pattern": pattern,
-                    "Regex": regex,
-                    "Exclude": exclude
-                }
-
-            reqs[tag] = tag_reqs
-        return reqs
-
-    def find_tag(self,
-                 sidecar: dict,
-                 requirements: dict | None = None) -> list:
-        """Find which configured tags, if any, can be applied to a sidecar.
-
-        Args:
-            sidecar (`dict`): The contents of a json sidecar.
-            requirements (`dict`, optional): The requirements to match
-                each accepted tag. Default is 'None', in which case the
-                default datman configuration will be consulted.
-
-        Returns:
-            A list of tag names that the sidecar matches.
-        """
-        if not requirements:
-            requirements = self.get_tag_requirements()
-
-        found = []
-        for tag in requirements:
-
-            match = True
-            for field in requirements[tag]:
-                pattern = requirements[tag][field].get("Pattern", "")
-                is_regex = requirements[tag][field].get("Regex", False)
-                exclude = requirements[tag][field].get("Exclude", False)
-
-                if field not in sidecar:
-                    if not exclude:
-                        # Absence of an expected field fails tag match
-                        match = False
-                    continue
-
-                if exclude and not pattern:
-                    # Excluded field is in sidecar, so doesnt match tag
-                    match = False
-                    continue
-
-                actual = sidecar[field]
-                if not isinstance(actual, str):
-                    actual = str(actual)
-
-                if is_regex:
-                    comparator = re.search
-                else:
-                    comparator = re.fullmatch
-
-                if not comparator(pattern, actual, re.IGNORECASE):
-                    match = False
-                elif exclude:
-                    # Tag does match, but settings indicate to take inverse
-                    match = False
-            if match:
-                found.append(tag)
-
-        return found
-
-    def handle_duplicate_names(
-        self,
-        existing_names: dict[str, str],
-        new_entries: dict[str, dict]
-    ) -> dict[str, str]:
-        """Make duplicated names unique.
-
-        Sometimes, as with multi-echo scans, multiple BIDs files will create
-        the same datman name. This ensures a unique name exists for each.
-
-        Args:
-            existing_names (`dict`): The dictionary to add the fixed name
-                entries to.
-            new_entries (`dict`): New entries that may contain duplicated
-                datman-style names.
-
-        Returns:
-            dict[str, str]: The existing_names dictionary with all
-                new entries merged in with unique names.
-        """
-        for name in new_entries:
-
-            if len(new_entries[name]) == 1:
-                existing_names[name] = remove_extension(
-                    new_entries[name][0]["Path"]
-                )
-                continue
-
-            for sidecar in new_entries[name]:
-                if "EchoNumber" not in sidecar:
-                    logger.error(
-                        "Multiple BIDs files result in same file name "
-                        f"'{name}'. Please update configuration to help "
-                        f"identify file: {sidecar['Path']}"
-                    )
-                    continue
-                new_name = name + f"_ECHO-{sidecar['EchoNumber']}"
-                existing_names[new_name] = remove_extension(sidecar['Path'])
-
-        return existing_names
-
-
-def is_malformed_conf(config: dict) -> bool:
-    """Check if a tag's configuration is unusably malformed.
-    """
-    if "Pattern" not in config:
-        return True
-    if "SeriesDescription" not in config["Pattern"]:
-        return True
-    return False
-
-
-def remove_extension(path: Path) -> Path:
-    """Remove all extensions from a path.
-    """
-    while path.suffix:
-        path = path.with_suffix("")
-    return path
-
-
-def is_broken_link(symlink: str) -> bool:
-    return os.path.islink(symlink) and not os.path.exists(symlink)
-
-
-def remove_broken_link(target: str):
-    try:
-        os.unlink(target)
-    except OSError as e:
-        logger.error(f"Failed to remove broken symlink {target} - {e}")
-
-
-def make_link(source: str, target: str):
-    try:
-        os.symlink(source, target)
-    except FileExistsError:
-        pass
-    except OSError as e:
-        logger.error(f"Failed to create {target} - {e}")
-
-
-def read_sidecar(sidecar: str | Path) -> dict:
-    """Read the contents of a JSON sidecar file.
-
-    NOTE: This adds the path of the file itself under the key 'Path'
-    """
-    if not isinstance(sidecar, Path):
-        sidecar = Path(sidecar)
-
-    try:
-        contents = sidecar.read_text(encoding="utf-8")
-    except (UnicodeDecodeError, OSError) as e:
-        logger.debug(
-            f"Sidecar file is unreadable {sidecar} - {e}"
-        )
-        return {}
-
-    try:
-        data = json.loads(contents)
-    except (json.JSONDecodeError, TypeError) as e:
-        logger.debug(f"Invalid json sidecar {sidecar} - {e}")
-        return {}
-
-    data["Path"] = sidecar
-
-    return data
diff --git a/datman/exporters/bids_legacy.py b/datman/exporters/bids_legacy.py
index 283a24ae..06026987 100644
--- a/datman/exporters/bids_legacy.py
+++ b/datman/exporters/bids_legacy.py
@@ -21,15 +21,12 @@
 from dcm2bids.sidecar import Acquisition
 
 from datman.exceptions import MetadataException
-from datman.scanid import make_filename
-from datman.utils import (splitext, get_extension, write_json, read_json,
-                          filter_niftis, read_blacklist, get_relative_source,
-                          locate_metadata)
+from datman.utils import (splitext, write_json, read_json, locate_metadata)
 from .base import SessionExporter
 
 logger = logging.getLogger(__name__)
 
-__all__ = ["BidsExporter", "NiiLinkExporter", "BidsOptions"]
+__all__ = ["BidsExporter", "BidsOptions"]
 
 
 class BidsOptions:
@@ -594,356 +591,6 @@ def get_sidecars(self):
         return contents
 
 
-class NiiLinkExporter(SessionExporter):
-    """Populates a study's nii folder with symlinks pointing to the bids dir.
-    """
-
-    type = "nii_link"
-    ext = ".nii.gz"
-
-    def __init__(self, config, session, experiment, **kwargs):
-        self.ident = session._ident
-        self.output_dir = session.nii_path
-        self.bids_path = session.bids_path
-        self.config = config
-        self.tags = config.get_tags(site=session.site)
-
-        super().__init__(config, session, experiment, **kwargs)
-
-        self.dm_names = self.get_dm_names()
-        self.bids_names = self.get_bids_niftis()
-        self.name_map = self.match_dm_to_bids(self.dm_names, self.bids_names)
-
-    def get_dm_names(self):
-        """Get the datman-style scan names for an entire XNAT experiment.
-
-        Returns:
-            :obj:`list`: A list of datman-style names for all scans found
-                for the session on XNAT.
-        """
-        names = []
-        for scan in self.experiment.scans:
-            names.extend(scan.names)
-        return names
-
-    def get_bids_niftis(self):
-        """Get all nifti files from a BIDS session.
-
-        Returns:
-            :obj:`list`: A list of full paths (minus the file extension) to
-                each bids format nifti file in the session.
-        """
-        bids_niftis = []
-        for path, _, files in os.walk(self.bids_path):
-            niftis = filter_niftis(files)
-            for item in niftis:
-                basename = item.replace(get_extension(item), "")
-                nii_path = os.path.join(path, basename)
-                if self.belongs_to_session(nii_path):
-                    bids_niftis.append(nii_path)
-        return bids_niftis
-
-    def belongs_to_session(self, nifti_path):
-        """Check if a nifti belongs to this repeat or another for this session.
-
-        Args:
-            nifti_path (str): A nifti file name from the bids folder (minus
-                extension).
-
-        Returns:
-            bool: True if the nifti file belongs to this particular
-                repeat. False if it belongs to another repeat.
-        """
-        try:
-            side_car = read_json(nifti_path + ".json")
-        except FileNotFoundError:
-            # Assume it belongs if a side car cant be read.
-            return True
-
-        repeat = side_car.get("Repeat")
-        if not repeat:
-            # No repeat is recorded in the json, assume its for this session.
-            return True
-
-        return repeat == self.ident.session
-
-    def match_dm_to_bids(self, dm_names, bids_names):
-        """Match each datman file name to its BIDS equivalent.
-
-        Args:
-            dm_names (:obj:`list`): A list of all valid datman scan names found
-                for this session on XNAT.
-            bids_names (:obj:`list`): A list of all bids files (minus
-                extensions) that exist for this session.
-
-        Returns:
-            :obj:`dict`: A dictionary matching the intended datman file name to
-                the full path (minus extension) of the same series in the bids
-                folder. If no matching bids file was found, it will instead be
-                matched to the string 'missing'.
-        """
-        name_map = {}
-        for tag in self.tags:
-            try:
-                bids_conf = self.tags.get(tag)['Bids']
-            except KeyError:
-                logger.info(f"No bids config found for tag {tag}. Can't match "
-                            "bids outputs to a datman-style name.")
-                continue
-
-            matches = self._find_matching_files(bids_names, bids_conf)
-
-            for item in matches:
-                try:
-                    dm_name = self.make_datman_name(item, tag)
-                except Exception as e:
-                    logger.error(
-                        f"Failed to assign datman style name to {item}. "
-                        f"Reason - {e}")
-                    continue
-                name_map[dm_name] = item
-
-        for scan in dm_names:
-            output_file = os.path.join(self.output_dir, scan + self.ext)
-            if scan not in name_map and not os.path.exists(output_file):
-                # An expected scan is missing from the bids folder and
-                # hasnt already been exported directly with dcm2niix
-                name_map[scan] = "missing"
-
-        return name_map
-
-    def make_datman_name(self, bids_path, scan_tag):
-        """Create a Datman-style file name for a bids file.
-
-        Args:
-            bids_path (str): The full path (+/- extension) of a bids file to
-                create a datman name for.
-            scan_tag (str): A datman style tag to apply to the bids scan.
-
-        Returns:
-            str: A valid datman style file name (minus extension).
-        """
-        side_car = read_json(bids_path + ".json")
-        description = side_car['SeriesDescription']
-        num = self.get_series_num(side_car)
-
-        dm_name = make_filename(self.ident, scan_tag, num, description)
-        return dm_name
-
-    def get_series_num(self, side_car):
-        """Find the correct series number for a scan.
-
-        Most JSON side car files have the correct series number already.
-        However, series that are split during nifti conversion (e.g.
-        FMAP-AP/-PA) end up with one of the two JSON files having a modified
-        series number. This function will default to the XNAT series number
-        whenever possible, for accuracy.
-
-        Args:
-            side_car (:obj:`dict`): A dictionary containing the contents of a
-                scan's JSON side car file.
-
-        Returns:
-            str: The most accurate series number found for the scan.
-        """
-        description = side_car['SeriesDescription']
-        num = str(side_car['SeriesNumber'])
-        xnat_scans = [item for item in self.experiment.scans
-                      if item.description == description]
-
-        if not xnat_scans:
-            return num
-
-        if len(xnat_scans) == 1:
-            return xnat_scans[0].series
-
-        # Catch split series (dcm2bids adds 1000 to the series number of
-        # one of the two files)
-        split_num = str(int(num) - 1000).zfill(2)
-        if any([split_num == str(item.series).zfill(2)
-                for item in xnat_scans]):
-            return split_num
-
-        return num
-
-    def _find_matching_files(self, bids_names, bids_conf):
-        """Search a list of bids files to find series that match a datman tag.
-
-        Args:
-            bids_names (:obj:`list`): A list of bids file names to search
-                through.
-            bids_conf (:obj:`dict`): The bids configuration for a single tag
-                from datman's configuration files.
-
-        Returns:
-            :obj:`list`: A list of full paths (minus extension) of bids files
-                that match the tag configuration. If none match, an empty
-                list will be returned.
-        """
-        matches = self._filter_bids(
-            bids_names, bids_conf.get('class'), par_dir=True)
-        matches = self._filter_bids(
-            matches, bids_conf.get(self._get_label_key(bids_conf)))
-        matches = self._filter_bids(matches, bids_conf.get('task'))
-        matches = self._filter_bids(matches, bids_conf.get('dir'))
-        # The below is used to more accurately match FMAP tags
-        matches = self._filter_bids(matches, bids_conf.get('match_acq'))
-        return matches
-
-    def _filter_bids(self, niftis, search_term, par_dir=False):
-        """Find the subset of file names that matches a search string.
-
-        Args:
-            niftis (:obj:`list`): A list of nifti file names to search through.
-            search_term (:obj:`str`): The search term nifti files must match.
-            par_dir (bool, optional): Restricts the search to the nifti file's
-                parent directory, if full paths were given.
-
-        Returns:
-            list: A list of all files that match the search term.
-        """
-        if not search_term:
-            return niftis.copy()
-
-        if not isinstance(search_term, list):
-            search_term = [search_term]
-
-        result = set()
-        for item in niftis:
-            if par_dir:
-                fname = os.path.split(os.path.dirname(item))[1]
-            else:
-                fname = os.path.basename(item)
-
-            for term in search_term:
-                if term in fname:
-                    result.add(item)
-        return list(result)
-
-    def _get_label_key(self, bids_conf):
-        """Return the name for the configuration's label field.
-        """
-        for key in bids_conf:
-            if 'label' in key:
-                return key
-        return ""
-
-    @classmethod
-    def get_output_dir(cls, session):
-        return session.nii_path
-
-    def get_error_file(self, dm_file):
-        return os.path.join(self.output_dir, dm_file + ".err")
-
-    def outputs_exist(self):
-        for dm_name in self.name_map:
-            if read_blacklist(scan=dm_name, config=self.config):
-                continue
-
-            if self.name_map[dm_name] == "missing":
-                if not os.path.exists(self.get_error_file(dm_name)):
-                    return False
-                continue
-
-            full_path = os.path.join(self.output_dir, dm_name + self.ext)
-            if not os.path.exists(full_path):
-                return False
-        return True
-
-    def needs_raw_data(self):
-        return False
-
-    def export(self, *args, **kwargs):
-        # Re run this before exporting, in case new BIDS files exist.
-        self.bids_names = self.get_bids_niftis()
-        self.name_map = self.match_dm_to_bids(self.dm_names, self.bids_names)
-
-        if self.dry_run:
-            logger.info("Dry run: Skipping making nii folder links for "
-                        f"mapping {self.name_map}")
-            return
-
-        if self.outputs_exist():
-            return
-
-        self.make_output_dir()
-        for dm_name, bids_name in self.name_map.items():
-            if bids_name == "missing":
-                self.report_errors(dm_name)
-            else:
-                self.make_link(dm_name, bids_name)
-                # Run in case of previous errors
-                self.clear_errors(dm_name)
-
-    def report_errors(self, dm_file):
-        """Create an error file to report probable BIDS conversion issues.
-
-        Args:
-            dm_file (:obj:`str`): A valid datman file name.
-        """
-        err_file = self.get_error_file(dm_file)
-        contents = (
-            f"{dm_file} could not be made. This may be due to a dcm2bids "
-            "conversion error or an issue with downloading the raw dicoms. "
-            "Please contact an admin as soon as possible.\n"
-        )
-        try:
-            with open(err_file, "w") as fh:
-                fh.write(contents)
-        except Exception as e:
-            logger.error(
-                f"Failed to write error file for {dm_file}. Reason - {e}"
-            )
-
-    def clear_errors(self, dm_file):
-        """Remove an error file from a previous BIDs export issue.
-
-        Args:
-            dm_file (:obj:`str`): A valid datman file name.
-        """
-        err_file = self.get_error_file(dm_file)
-        try:
-            os.remove(err_file)
-        except FileNotFoundError:
-            pass
-        except Exception as e:
-            logger.error(f"Failed while removing {err_file}. Reason - {e}")
-
-    def make_link(self, dm_file, bids_file):
-        """Create a symlink in the datman style that points to a bids file.
-
-        Args:
-            dm_file (:obj:`str`): A valid datman file name.
-            bids_file (:obj:`str`): The full path to a bids file (minus
-                extension.)
-        """
-        base_target = os.path.join(self.output_dir, dm_file)
-        if read_blacklist(scan=base_target, config=self.config):
-            logger.debug(f"Ignoring blacklisted scan {dm_file}")
-            return
-
-        for source in glob(bids_file + '*'):
-            ext = get_extension(source)
-            target = base_target + ext
-
-            if os.path.islink(target) and not os.path.exists(target):
-                # Remove a broken symlink
-                try:
-                    os.unlink(target)
-                except Exception as exc:
-                    logger.error(
-                        f"Failed to remove broken symlink {target} - {exc}")
-                    continue
-
-            rel_source = get_relative_source(source, target)
-            try:
-                os.symlink(rel_source, target)
-            except FileExistsError:
-                pass
-            except Exception as exc:
-                logger.error(f"Failed to create {target}. Reason - {exc}")
-
-
 class FakeSidecar(dcm2bids.Sidecar):
     """Turns XNAT series descriptions into pseudo-sidecars.
     """
@@ -975,16 +622,6 @@ def __repr__(self):
         return f"<FakeSidecar {self.data['SeriesDescription']}>"
 
 
-def get_expected_names(participant, sidecars, bids_conf):
-    parser = dcm2bids.SidecarPairing(
-        sidecars, bids_conf["descriptions"]
-    )
-    parser.build_graph()
-    parser.build_acquisitions(participant)
-    parser.find_runs()
-    return [acq.dstRoot for acq in parser.acquisitions]
-
-
 def remove_criteria(descriptions):
     trim_conf = []
     for descr in descriptions:
diff --git a/datman/exporters/nii_symlink.py b/datman/exporters/nii_symlink.py
new file mode 100644
index 00000000..50e62a28
--- /dev/null
+++ b/datman/exporters/nii_symlink.py
@@ -0,0 +1,468 @@
+"""Populate the 'nii' folder with symlinks to the bids folder.
+"""
+import logging
+import os
+import re
+from glob import glob
+from pathlib import Path
+
+from datman.scanid import make_filename
+from datman.utils import (read_blacklist, get_relative_source, get_extension)
+from .base import SessionExporter, read_sidecar
+
+logger = logging.getLogger(__name__)
+
+__all__ = ["NiiLinkExporter"]
+
+
+class NiiLinkExporter(SessionExporter):
+    """Populates a study's nii folder with symlinks pointing to the bids dir.
+    """
+
+    type = "nii_link"
+    ext = ".nii.gz"
+
+    def __init__(self, config, session, importer, **kwargs):
+        self.ident = session._ident
+        self.output_dir = session.nii_path
+        self.bids_path = session.bids_path
+        self.repeat = session.session
+        self.config = config
+        self.tags = config.get_tags(site=session.site)
+
+        super().__init__(config, session, importer, **kwargs)
+
+    @classmethod
+    def get_output_dir(cls, session):
+        return session.nii_path
+
+    def needs_raw_data(self):
+        return False
+
+    def outputs_exist(self):
+        sidecars = self.get_bids_sidecars()
+        name_map = self.make_dm_names(sidecars)
+
+        for dm_name in name_map:
+
+            if read_blacklist(scan=dm_name, config=self.config):
+                continue
+
+            full_path = os.path.join(self.output_dir, dm_name + self.ext)
+            if not os.path.exists(full_path):
+                return False
+
+        return True
+
+    def export(self, *args, **kwargs):
+        sidecars = self.get_bids_sidecars()
+        name_map = self.make_dm_names(sidecars)
+
+        if self.dry_run:
+            logger.info("Dry run: Skipping making nii folder links for "
+                        f"mapping {name_map}")
+            return
+
+        if self.outputs_exist():
+            return
+
+        self.make_output_dir()
+
+        for dm_name, bids_name in name_map.items():
+            self.link_scan(dm_name, bids_name)
+
+    def link_scan(self, dm_name: str, bids_root: Path | str):
+        """Create a symlink in the datman style that points to a bids file.
+
+        Args:
+            dm_name (:obj:`str`): A valid datman file name.
+            bids_root (:obj:`pathlib.Path`): The full path to a bids file
+                (without an extension).
+        """
+
+        if read_blacklist(scan=dm_name, config=self.config):
+            logger.debug(f"Ignoring blacklisted scan {dm_name}")
+            return
+
+        base_target = os.path.join(self.output_dir, dm_name)
+        for source in glob(str(bids_root) + "*"):
+            ext = get_extension(source)
+            target = base_target + ext
+
+            if is_broken_link(target):
+                remove_broken_link(target)
+
+            rel_source = get_relative_source(source, target)
+            make_link(rel_source, target)
+
+    def get_bids_sidecars(self) -> dict[int, list]:
+        """Get all sidecars from the session's BIDS folder.
+
+        Returns:
+            :obj:`dict`: A map from the series number to a list of the JSON
+                sidecar contents that result from that series.
+        """
+        sidecars = {}
+        bids_folder = Path(self.bids_path)
+        for sidecar in bids_folder.rglob("*.json"):
+
+            contents = read_sidecar(sidecar)
+            if not contents:
+                continue
+
+            if not self.matches_repeat(contents):
+                continue
+
+            if "SeriesNumber" not in contents:
+                logger.debug(
+                    "Ignoring malformed sidecar file (missing SeriesNumber): "
+                    f"{sidecar}"
+                )
+                continue
+
+            try:
+                series_num = int(contents["SeriesNumber"])
+            except ValueError:
+                logger.debug(
+                    f"Ignoring non-numeric series number in {sidecar}"
+                )
+                continue
+
+            sidecars.setdefault(series_num, []).append(contents)
+
+        self.fix_split_series_nums(sidecars)
+
+        return sidecars
+
+    def matches_repeat(self, sidecar: dict) -> bool:
+        """Check if a sidecar matches the current session's 'repeat'.
+
+        The 'repeat' number is used to track when a scan session was stopped
+        and restarted during a visit. Most of the time it will be '01'.
+        """
+        if "Repeat" not in sidecar:
+            # If this session is the first 'repeat' it's safe to assume an
+            # untagged sidecar belongs to it, since usually there's only one
+            # 'repeat' anyway.
+            return self.repeat == "01"
+        return sidecar["Repeat"] == self.repeat
+
+    def fix_split_series_nums(
+            self,
+            sidecars: dict[int, list]
+    ) -> dict[int, list]:
+        """Attempt to correct series nums that have been prefixed with '10'.
+
+        Some older versions of dcm2niix/dcm2bids liked to prefix half of a
+        split series' number with '10' rather than allowing all sidecars
+        to share the original series num. This attempts to identify when
+        that has happened and find the original series number for these
+        files.
+        """
+        all_series = [str(series).zfill(2) for series in sidecars]
+        must_delete = []
+
+        for series in sidecars:
+            str_series = str(series)
+
+            if not str_series.startswith("10"):
+                continue
+
+            if len(str_series) < 4:
+                continue
+
+            trimmed_series = str_series[2:]
+            if trimmed_series not in all_series:
+                # False alarm, probably not a mutated series number
+                continue
+
+            sidecars[int(trimmed_series)].extend(sidecars[series])
+            must_delete.append(series)
+
+        for series in must_delete:
+            del sidecars[series]
+
+        return sidecars
+
+    def make_dm_names(self, sidecars: dict[int, list]) -> dict[str, Path]:
+        """Create a datman-style name for each identifiable sidecar.
+
+        Args:
+            sidecars (`dict`): A dictionary mapping series numbers to a list
+                of bids sidecar files generated by that series.
+
+        Returns:
+            dict: a dictionary mapping a datman-style filename to the bids
+                sidecar path (minus extension) it belongs to.
+        """
+        found_names = {}
+        reqs = self.get_tag_requirements()
+        for series in sidecars:
+
+            temp_names = {}
+            for item in sidecars[series]:
+
+                found = self.find_tag(item, reqs)
+
+                if not found:
+                    logger.debug(f"No tag matches {item['Path']}, ignoring.")
+                    continue
+
+                if len(found) > 1:
+                    logger.debug(
+                        f"Multiple tags ({found}) match sidecar "
+                        f"{item['Path']}. Ignoring it. Please update "
+                        "configuration so at most one tag matches."
+                    )
+                    continue
+
+                dm_name = make_filename(
+                    self.ident,
+                    found[0],
+                    series,
+                    item["SeriesDescription"]
+                )
+
+                temp_names.setdefault(dm_name, []).append(item)
+
+            found_names = self.handle_duplicate_names(found_names, temp_names)
+
+        return found_names
+
+    def get_tag_requirements(self) -> dict[str, dict]:
+        """Read and reformat user configuration for all tags.
+
+        As described in datman's configuration documentation, at a minimum each
+        tag must define a 'SeriesDescription' regular expression. Tags
+        may optionally include a 'Bids' section, alongside datman's
+        'Pattern' and 'Count' fields for a tag to make it more restrictive or
+        accurate.
+
+        If included, the 'Bids' section should contain a list of sidecar field
+        names to check when determining if a tag can by applied. These must
+        match the sidecars fields verbatim (case-sensitive). Each field name
+        may then point to either:
+
+            - a literal string to be matched
+            - a dictionary of settings
+
+        The dictionary of settings may include the following keys:
+
+        - **Pattern** (`str` or list, optional): May be a literal string or a
+          regular expression in Python format (e.g., use `.*` not `*`), or a
+          list of literal strings. Optional if `Exclude` is given. If omitted
+          and `Exclude` is used, the presence of the field name alone
+          excludes a sidecar from taking the tag.
+        - **Regex** (`bool`, optional): Indicates whether `Pattern` is a regex
+          or a string literal. Default is `False`.
+        - **Exclude** (`bool`, optional): Indicates whether to exclude sidecars
+          that match the pattern (i.e., take the inverse). Default is `False`.
+
+        Examples:
+            Below are some YAML examples of commonly used configuration.
+
+            Prevent any sidecar with an 'IntendedFor' field from matching
+            a tag:
+
+                Bids:
+                    IntendedFor:
+                        Exclude: True
+
+            Match a sidecar only if the PhaseEncodingDirection is exactly 'j':
+
+                Bids:
+                    PhaseEncodingDirection: 'j'
+
+            Match a sidecar only if the ImageType contains 'DERIVED':
+
+                Bids:
+                    ImageType:
+                        Pattern: 'DERIVED'
+                        Regex: True
+
+        Returns:
+            A dictionary mapping each tag name to the requirements that
+                must be met for a tag to be applied to a BIDs sidecar.
+        """
+        reqs = {}
+        for tag in self.tags:
+
+            conf = self.tags.get(tag)
+
+            if is_malformed_conf(conf):
+                logger.error(
+                    f"Ignoring tag {tag} - Incorrectly configured. Each tag "
+                    "must contain a 'Pattern' section and each 'Pattern', at "
+                    "a minimum, must contain a 'SeriesDescription'. Consult "
+                    "the docs for more info.")
+                continue
+
+            regex = conf["Pattern"]["SeriesDescription"]
+            if isinstance(regex, list):
+                regex = "|".join(regex)
+
+            tag_reqs = {
+                "SeriesDescription": {
+                    "Pattern": regex,
+                    "Regex": True,
+                    "Exclude": False
+                }
+            }
+
+            bids_conf = conf.get("Bids", {})
+            for field in bids_conf:
+                # Ensure consistent formatting for settings
+                if isinstance(bids_conf[field], (str, int)):
+                    pattern = str(bids_conf[field])
+                    regex = False
+                    exclude = False
+                else:
+                    pattern = bids_conf[field].get("Pattern", "")
+                    if not isinstance(pattern, str):
+                        pattern = str(pattern)
+                    regex = bids_conf[field].get("Regex", False)
+                    exclude = bids_conf[field].get("Exclude", False)
+
+                tag_reqs[field] = {
+                    "Pattern": pattern,
+                    "Regex": regex,
+                    "Exclude": exclude
+                }
+
+            reqs[tag] = tag_reqs
+        return reqs
+
+    def find_tag(self,
+                 sidecar: dict,
+                 requirements: dict | None = None) -> list:
+        """Find which configured tags, if any, can be applied to a sidecar.
+
+        Args:
+            sidecar (`dict`): The contents of a json sidecar.
+            requirements (`dict`, optional): The requirements to match
+                each accepted tag. Default is 'None', in which case the
+                default datman configuration will be consulted.
+
+        Returns:
+            A list of tag names that the sidecar matches.
+        """
+        if not requirements:
+            requirements = self.get_tag_requirements()
+
+        found = []
+        for tag in requirements:
+
+            match = True
+            for field in requirements[tag]:
+                pattern = requirements[tag][field].get("Pattern", "")
+                is_regex = requirements[tag][field].get("Regex", False)
+                exclude = requirements[tag][field].get("Exclude", False)
+
+                if field not in sidecar:
+                    if not exclude:
+                        # Absence of an expected field fails tag match
+                        match = False
+                    continue
+
+                if exclude and not pattern:
+                    # Excluded field is in sidecar, so doesnt match tag
+                    match = False
+                    continue
+
+                actual = sidecar[field]
+                if not isinstance(actual, str):
+                    actual = str(actual)
+
+                if is_regex:
+                    comparator = re.search
+                else:
+                    comparator = re.fullmatch
+
+                if not comparator(pattern, actual, re.IGNORECASE):
+                    match = False
+                elif exclude:
+                    # Tag does match, but settings indicate to take inverse
+                    match = False
+            if match:
+                found.append(tag)
+
+        return found
+
+    def handle_duplicate_names(
+        self,
+        existing_names: dict[str, str],
+        new_entries: dict[str, dict]
+    ) -> dict[str, str]:
+        """Make duplicated names unique.
+
+        Sometimes, as with multi-echo scans, multiple BIDs files will create
+        the same datman name. This ensures a unique name exists for each.
+
+        Args:
+            existing_names (`dict`): The dictionary to add the fixed name
+                entries to.
+            new_entries (`dict`): New entries that may contain duplicated
+                datman-style names.
+
+        Returns:
+            dict[str, str]: The existing_names dictionary with all
+                new entries merged in with unique names.
+        """
+        for name in new_entries:
+
+            if len(new_entries[name]) == 1:
+                existing_names[name] = remove_extension(
+                    new_entries[name][0]["Path"]
+                )
+                continue
+
+            for sidecar in new_entries[name]:
+                if "EchoNumber" not in sidecar:
+                    logger.error(
+                        "Multiple BIDs files result in same file name "
+                        f"'{name}'. Please update configuration to help "
+                        f"identify file: {sidecar['Path']}"
+                    )
+                    continue
+                new_name = name + f"_ECHO-{sidecar['EchoNumber']}"
+                existing_names[new_name] = remove_extension(sidecar['Path'])
+
+        return existing_names
+
+
+def is_malformed_conf(config: dict) -> bool:
+    """Check if a tag's configuration is unusably malformed.
+    """
+    if "Pattern" not in config:
+        return True
+    if "SeriesDescription" not in config["Pattern"]:
+        return True
+    return False
+
+
+def remove_extension(path: Path) -> Path:
+    """Remove all extensions from a path.
+    """
+    while path.suffix:
+        path = path.with_suffix("")
+    return path
+
+
+def is_broken_link(symlink: str) -> bool:
+    return os.path.islink(symlink) and not os.path.exists(symlink)
+
+
+def remove_broken_link(target: str):
+    try:
+        os.unlink(target)
+    except OSError as e:
+        logger.error(f"Failed to remove broken symlink {target} - {e}")
+
+
+def make_link(source: str, target: str):
+    try:
+        os.symlink(source, target)
+    except FileExistsError:
+        pass
+    except OSError as e:
+        logger.error(f"Failed to create {target} - {e}")