From 8ea22552b07b5177ea3c5006fc340d340a96b601 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Sun, 9 Feb 2025 16:18:19 -0800
Subject: [PATCH 1/9] feat: Add API management command for correcting corrupt
 metadata

---
 .../management/commands/correct_metadata.py   | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 dandiapi/api/management/commands/correct_metadata.py

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
new file mode 100644
index 000000000..269828290
--- /dev/null
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+from django.db import transaction
+import djclick as click
+
+from dandiapi.api.models import Version
+from dandiapi.api.tasks import write_manifest_files
+
+
+@click.command(
+    help='Correct corrupted metadata. If `--dandiset` and `--dandiset-version` are provided, '
+    'apply the correction to the specified Dandiset version. Otherwise, apply the correction to '
+    'all Dandiset versions.'
+)
+@click.option(
+    '--dandiset',
+    'dandiset',
+    help='The Dandiset. (This must be provided if `--dandiset-version` is provided.)',
+)
+@click.option(
+    '--dandiset-version',
+    'dandiset_version',
+    help='The version of the Dandiset. (This must be provided if `--dandiset` is provided.)',
+)
+def correct_metadata(*, dandiset: str | None, dandiset_version: str | None):
+    # Func to use to correct the metadata (update as needed)
+    correct_func: Callable[[dict], dict | None] = correct_affiliation_corruption
+
+    # Ensure both options are provided together (or both omitted)
+    if (dandiset is None) != (dandiset_version is None):
+        raise click.UsageError(
+            'Both `--dandiset` and `--dandiset-version` must be provided together.'
+        )
+
+    vers = (
+        Version.objects.all()
+        if dandiset is None
+        else (Version.objects.get(dandiset=dandiset, version=dandiset_version),)
+    )
+
+    # List of changes to be applied to the database represented as tuples of the form
+    # version to be changed and the new metadata
+    changes: list[tuple[Version, dict]] = []
+    for ver in vers:
+        try:
+            meta_corrected = correct_func(ver.metadata)
+        except Exception as e:
+            click.echo(
+                f'{ver.dandiset.identifier}/{ver.version}: Failed to correct metadata with '
+                f'`{correct_func.__name__}()`'
+            )
+            click.echo(e)
+            raise click.Abort from e
+
+        if meta_corrected is not None:
+            changes.append((ver, meta_corrected))
+
+    if changes:
+        # Apply the changes to the database atomically
+        with transaction.atomic():
+            for ver, meta_corrected in changes:
+                ver.metadata = meta_corrected
+                ver.save()  # TODO: should I use `save()` in this case? It has some custom logic.
+
+        for ver, _ in changes:
+            write_manifest_files.delay(ver.id)  # TODO: Please check if this is needed
+
+            click.echo(
+                f'{ver.dandiset.identifier}/{ver.version}: Metadata corrected with '
+                f'`{correct_func.__name__}()`'
+            )
+    else:
+        click.echo(
+            f'No changes. No metadata of the Dandiset version(s) need to be corrected per '
+            f'`{correct_func.__name__}()`'
+        )
+
+
+def correct_affiliation_corruption(meta: dict) -> dict | None:
+    """
+    Correct corruptions in JSON objects with the `"schemaKey"` of `"Affiliation"`.
+
+    :param meta: The Dandiset metadata instance potentially containing the objects to be corrected.
+    :return: If there is correction to be made, return the corrected metadata; otherwise, return
+        `None`.
+
+    Note: This function corrects the corruptions described in
+        https://github.com/dandi/dandi-schema/issues/276
+    """

From 25e4cfae4dd0156c7683f8293e958ffaf3473e6d Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Sun, 9 Feb 2025 22:23:25 -0800
Subject: [PATCH 2/9] feat: add solution to correct `Affiliation` corruption

Provide solution to correct the corruption
of `Affiliation` JSON objects documented in
https://github.com/dandi/dandi-schema/issues/276
---
 .../management/commands/correct_metadata.py   |  42 +++-
 .../api/tests/test_management/__init__.py     |   0
 .../test_management/test_commands/__init__.py |   0
 .../test_commands/test_correct_metadata.py    | 224 ++++++++++++++++++
 4 files changed, 265 insertions(+), 1 deletion(-)
 create mode 100644 dandiapi/api/tests/test_management/__init__.py
 create mode 100644 dandiapi/api/tests/test_management/test_commands/__init__.py
 create mode 100644 dandiapi/api/tests/test_management/test_commands/test_correct_metadata.py

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index 269828290..8104b7b01 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from copy import deepcopy
+from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -92,3 +93,42 @@ def correct_affiliation_corruption(meta: dict) -> dict | None:
     Note: This function corrects the corruptions described in
         https://github.com/dandi/dandi-schema/issues/276
     """
+    unwanted_fields = ['contactPoint', 'includeInCitation', 'roleName']
+
+    meta_corrected = deepcopy(meta)
+    affiliation_objs = find_objs(meta_corrected, 'Affiliation')
+
+    corrected = False
+    for obj in affiliation_objs:
+        for field in unwanted_fields:
+            if field in obj:
+                del obj[field]
+                corrected = True
+
+    return meta_corrected if corrected else None
+
+
+def find_objs(instance: Any, schema_key: str) -> list[dict]:
+    """
+    Find JSON objects with a specified `"schemaKey"` field within a data instance.
+
+    :param instance: The data instance to find JSON objects from
+    :param schema_key: The `"schemaKey"` field value
+    :return: The list of JSON objects with the specified `"schemaKey"` in the data instance
+    """
+
+    def find_objs_(data: Any) -> None:
+        if isinstance(data, dict):
+            if 'schemaKey' in data and data['schemaKey'] == schema_key:
+                objs.append(data)
+            for value in data.values():
+                find_objs_(value)
+        elif isinstance(data, list):
+            for item in data:
+                find_objs_(item)
+        else:
+            return
+
+    objs: list[dict] = []
+    find_objs_(instance)
+    return objs
diff --git a/dandiapi/api/tests/test_management/__init__.py b/dandiapi/api/tests/test_management/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dandiapi/api/tests/test_management/test_commands/__init__.py b/dandiapi/api/tests/test_management/test_commands/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/dandiapi/api/tests/test_management/test_commands/test_correct_metadata.py b/dandiapi/api/tests/test_management/test_commands/test_correct_metadata.py
new file mode 100644
index 000000000..d4915d60c
--- /dev/null
+++ b/dandiapi/api/tests/test_management/test_commands/test_correct_metadata.py
@@ -0,0 +1,224 @@
+from __future__ import annotations
+
+from copy import deepcopy
+from typing import Any
+
+import pytest
+
+from dandiapi.api.management.commands.correct_metadata import (
+    correct_affiliation_corruption,
+    find_objs,
+)
+
+
+@pytest.mark.parametrize(
+    ('instance', 'schema_key', 'expected'),
+    [
+        # Single matching object.
+        pytest.param(
+            {'schemaKey': 'Test', 'data': 123},
+            'Test',
+            [{'schemaKey': 'Test', 'data': 123}],
+            id='single-match',
+        ),
+        # No match.
+        pytest.param(
+            {'schemaKey': 'NotMatch', 'data': 123},
+            'Test',
+            [],
+            id='no-match',
+        ),
+        # Empty dictionary should return an empty list.
+        pytest.param(
+            {},
+            'Test',
+            [],
+            id='empty-dict',
+        ),
+        # Empty list should return an empty list.
+        pytest.param(
+            [],
+            'Test',
+            [],
+            id='empty-list',
+        ),
+        # Nested dictionary: the matching object is nested within another dictionary.
+        pytest.param(
+            {'level1': {'schemaKey': 'Test', 'info': 'nested'}},
+            'Test',
+            [{'schemaKey': 'Test', 'info': 'nested'}],
+            id='nested-dict',
+        ),
+        # List of dictionaries: only those with matching schema key are returned.
+        pytest.param(
+            [
+                {'schemaKey': 'Test', 'data': 1},
+                {'schemaKey': 'Test', 'data': 2},
+                {'schemaKey': 'NotTest', 'data': 3},
+            ],
+            'Test',
+            [
+                {'schemaKey': 'Test', 'data': 1},
+                {'schemaKey': 'Test', 'data': 2},
+            ],
+            id='list-of-dicts',
+        ),
+        # Mixed structure: nested dictionaries and lists.
+        pytest.param(
+            {
+                'a': {'schemaKey': 'Test', 'value': 1},
+                'b': [
+                    {'schemaKey': 'NotTest', 'value': 2},
+                    {'schemaKey': 'Test', 'value': 3},
+                ],
+                'c': 'irrelevant',
+                'd': [{'e': {'schemaKey': 'Test', 'value': 4}}],
+            },
+            'Test',
+            [
+                {'schemaKey': 'Test', 'value': 1},
+                {'schemaKey': 'Test', 'value': 3},
+                {'schemaKey': 'Test', 'value': 4},
+            ],
+            id='mixed-structure',
+        ),
+        # Non-collection type: integer.
+        pytest.param(
+            42,
+            'Test',
+            [],
+            id='non-collection-int',
+        ),
+        # Non-collection type: string.
+        pytest.param(
+            'some string',
+            'Test',
+            [],
+            id='non-collection-string',
+        ),
+        # Non-collection type: float.
+        pytest.param(
+            3.14,
+            'Test',
+            [],
+            id='non-collection-float',
+        ),
+        # Non-collection type: None.
+        pytest.param(
+            None,
+            'Test',
+            [],
+            id='non-collection-None',
+        ),
+        # Nested child: an object with the schema key contains a nested child that also
+        # has the schema key.
+        pytest.param(
+            {'schemaKey': 'Test', 'child': {'schemaKey': 'Test', 'data': 'child'}},
+            'Test',
+            [
+                {'schemaKey': 'Test', 'child': {'schemaKey': 'Test', 'data': 'child'}},
+                {'schemaKey': 'Test', 'data': 'child'},
+            ],
+            id='nested-child',
+        ),
+        # List in field:
+        # The object with the given schema key has a field whose value is a list
+        # containing objects, some of which also have the given schema key.
+        pytest.param(
+            {
+                'schemaKey': 'Test',
+                'items': [
+                    {'schemaKey': 'Test', 'data': 'item1'},
+                    {'schemaKey': 'Other', 'data': 'item2'},
+                    {'schemaKey': 'Test', 'data': 'item3'},
+                ],
+            },
+            'Test',
+            [
+                # The outer object is returned first...
+                {
+                    'schemaKey': 'Test',
+                    'items': [
+                        {'schemaKey': 'Test', 'data': 'item1'},
+                        {'schemaKey': 'Other', 'data': 'item2'},
+                        {'schemaKey': 'Test', 'data': 'item3'},
+                    ],
+                },
+                # ...followed by the matching objects within the list.
+                {'schemaKey': 'Test', 'data': 'item1'},
+                {'schemaKey': 'Test', 'data': 'item3'},
+            ],
+            id='list-in-field',
+        ),
+    ],
+)
+def test_find_objs_parametrized(instance: Any, schema_key: str, expected: list[dict]) -> None:
+    result = find_objs(instance, schema_key)
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ('input_meta', 'expected_output'),
+    [
+        # No Affiliation object: nothing to change.
+        (
+            {'key': 'value'},
+            None,
+        ),
+        # Affiliation exists but has no unwanted fields: returns None.
+        (
+            {'affiliation': {'schemaKey': 'Affiliation', 'name': 'Alice'}},
+            None,
+        ),
+        # Single unwanted field ("contactPoint") should be removed.
+        (
+            {'affiliation': {'schemaKey': 'Affiliation', 'name': 'Alice', 'contactPoint': 'info'}},
+            {'affiliation': {'schemaKey': 'Affiliation', 'name': 'Alice'}},
+        ),
+        # Multiple unwanted fields should all be removed.
+        (
+            {
+                'affiliation': {
+                    'schemaKey': 'Affiliation',
+                    'name': 'Test',
+                    'contactPoint': 'a',
+                    'includeInCitation': 'b',
+                    'roleName': 'c',
+                }
+            },
+            {'affiliation': {'schemaKey': 'Affiliation', 'name': 'Test'}},
+        ),
+        # Nested Affiliation objects should be corrected.
+        (
+            {
+                'users': [
+                    {'profile': {'schemaKey': 'Affiliation', 'name': 'Bob', 'roleName': 'Member'}},
+                    {'profile': {'schemaKey': 'Affiliation', 'name': 'Charlie'}},
+                ],
+                'data': {'schemaKey': 'NotAffiliation', 'contactPoint': 'should not be touched'},
+            },
+            {
+                'users': [
+                    {'profile': {'schemaKey': 'Affiliation', 'name': 'Bob'}},
+                    {'profile': {'schemaKey': 'Affiliation', 'name': 'Charlie'}},
+                ],
+                'data': {'schemaKey': 'NotAffiliation', 'contactPoint': 'should not be touched'},
+            },
+        ),
+    ],
+)
+def test_correct_affiliation_corruption(input_meta, expected_output):
+    """
+    Test `correct_affiliation_corruption()`.
+
+    Ensure that it returns the correct modified metadata (if any corrections are needed)
+    while not mutating the original input.
+    """
+    # Make a deep copy of the input to ensure immutability.
+    original_meta = deepcopy(input_meta)
+    result = correct_affiliation_corruption(input_meta)
+
+    assert result == expected_output
+
+    # Verify that the original metadata has not been mutated.
+    assert input_meta == original_meta, 'The input metadata should remain unchanged.'

From eba4c3be72d8c4d692b453dbadd4ecb460c263fa Mon Sep 17 00:00:00 2001
From: Jacob Nesbitt <jjnesbitt2@gmail.com>
Date: Fri, 28 Feb 2025 17:21:14 -0500
Subject: [PATCH 3/9] Move test_correct_metadata.py into tests/

---
 .../{test_management/test_commands => }/test_correct_metadata.py  | 0
 dandiapi/api/tests/test_management/__init__.py                    | 0
 dandiapi/api/tests/test_management/test_commands/__init__.py      | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename dandiapi/api/tests/{test_management/test_commands => }/test_correct_metadata.py (100%)
 delete mode 100644 dandiapi/api/tests/test_management/__init__.py
 delete mode 100644 dandiapi/api/tests/test_management/test_commands/__init__.py

diff --git a/dandiapi/api/tests/test_management/test_commands/test_correct_metadata.py b/dandiapi/api/tests/test_correct_metadata.py
similarity index 100%
rename from dandiapi/api/tests/test_management/test_commands/test_correct_metadata.py
rename to dandiapi/api/tests/test_correct_metadata.py
diff --git a/dandiapi/api/tests/test_management/__init__.py b/dandiapi/api/tests/test_management/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/dandiapi/api/tests/test_management/test_commands/__init__.py b/dandiapi/api/tests/test_management/test_commands/__init__.py
deleted file mode 100644
index e69de29bb..000000000

From a21f6d00ce101489170c20c25047c2a8763ae965 Mon Sep 17 00:00:00 2001
From: Isaac To <isaac.chun.to@gmail.com>
Date: Fri, 28 Feb 2025 15:07:12 -0800
Subject: [PATCH 4/9] feat: change call interface of `correct_metadata` cmd

This make the default behavior to require
user to specify a particular dandiset version
to apply the correct to. Only when the `--all`
flag is provided, should the command apply the
correction to all dandiset versions
---
 .../management/commands/correct_metadata.py   | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index 8104b7b01..9f4f68140 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -14,33 +14,37 @@
 
 
 @click.command(
-    help='Correct corrupted metadata. If `--dandiset` and `--dandiset-version` are provided, '
-    'apply the correction to the specified Dandiset version. Otherwise, apply the correction to '
-    'all Dandiset versions.'
+    help='Correct corrupted metadata. If `--all` is provided, apply the correction to '
+    'all Dandiset versions. Otherwise, provide the Dandiset and Dandiset version to '
+    'apply the correction to.'
 )
+@click.argument('dandiset', required=False)
+@click.argument('dandiset_version', required=False)
 @click.option(
-    '--dandiset',
-    'dandiset',
-    help='The Dandiset. (This must be provided if `--dandiset-version` is provided.)',
+    '--all',
+    'apply_to_all',
+    is_flag=True,
+    default=False,
+    help='Apply the correction to all Dandiset versions '
+    '(cannot be combined with dandiset arguments).',
 )
-@click.option(
-    '--dandiset-version',
-    'dandiset_version',
-    help='The version of the Dandiset. (This must be provided if `--dandiset` is provided.)',
-)
-def correct_metadata(*, dandiset: str | None, dandiset_version: str | None):
-    # Func to use to correct the metadata (update as needed)
-    correct_func: Callable[[dict], dict | None] = correct_affiliation_corruption
-
-    # Ensure both options are provided together (or both omitted)
-    if (dandiset is None) != (dandiset_version is None):
+def correct_metadata(*, dandiset: str | None, dandiset_version: str | None, apply_to_all: bool):
+    if apply_to_all:
+        if dandiset is not None or dandiset_version is not None:
+            raise click.UsageError(
+                'Cannot specify `--all` together with `dandiset` or `dandiset_version` arguments.'
+            )
+    elif dandiset is None or dandiset_version is None:
         raise click.UsageError(
-            'Both `--dandiset` and `--dandiset-version` must be provided together.'
+            'Either `--all` or two arguments (dandiset, dandiset_version) must be provided.'
         )
 
+    # Func to use to correct the metadata (update as needed)
+    correct_func: Callable[[dict], dict | None] = correct_affiliation_corruption
+
     vers = (
         Version.objects.all()
-        if dandiset is None
+        if apply_to_all
         else (Version.objects.get(dandiset=dandiset, version=dandiset_version),)
     )
 

From 256a6dc893e136c82d1c6fe829206efb7a3cf4bc Mon Sep 17 00:00:00 2001
From: Jacob Nesbitt <jjnesbitt2@gmail.com>
Date: Mon, 3 Mar 2025 12:01:21 -0500
Subject: [PATCH 5/9] Add --check flag

---
 dandiapi/api/management/commands/correct_metadata.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index 9f4f68140..0e44128bc 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -28,7 +28,14 @@
     help='Apply the correction to all Dandiset versions '
     '(cannot be combined with dandiset arguments).',
 )
-def correct_metadata(*, dandiset: str | None, dandiset_version: str | None, apply_to_all: bool):
+@click.option(
+    '--check',
+    is_flag=True,
+    help="Don't perform any changes, just check for corrupted metadata.",
+)
+def correct_metadata(
+    *, dandiset: str | None, dandiset_version: str | None, apply_to_all: bool, check: bool
+):
     if apply_to_all:
         if dandiset is not None or dandiset_version is not None:
             raise click.UsageError(

From a89592196098f3534058ff12a9056b0fb6e6ef75 Mon Sep 17 00:00:00 2001
From: Jacob Nesbitt <jjnesbitt2@gmail.com>
Date: Mon, 3 Mar 2025 12:41:49 -0500
Subject: [PATCH 6/9] Simplify correct_metadata and helper functions

- Don't allow correct function configuration
- Remove verbose error handling logic
- Write manifest files synchronously
- Only support correction of the `Affiliation` schema key
- Remove `find_objs` tests against generalized schema key
- Use transactions to couple logic of metadata save and manifest file generation
---
 .../management/commands/correct_metadata.py   | 134 +++++++--------
 dandiapi/api/tests/test_correct_metadata.py   | 152 +-----------------
 2 files changed, 55 insertions(+), 231 deletions(-)

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index 0e44128bc..f437075be 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -1,16 +1,15 @@
 from __future__ import annotations
 
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from collections.abc import Callable
+import sys
+import typing
+from typing import Any
 
 from django.db import transaction
 import djclick as click
 
+from dandiapi.api.manifests import write_dandiset_jsonld, write_dandiset_yaml
 from dandiapi.api.models import Version
-from dandiapi.api.tasks import write_manifest_files
 
 
 @click.command(
@@ -33,7 +32,7 @@
     is_flag=True,
     help="Don't perform any changes, just check for corrupted metadata.",
 )
-def correct_metadata(
+def correct_metadata(  # noqa: C901
     *, dandiset: str | None, dandiset_version: str | None, apply_to_all: bool, check: bool
 ):
     if apply_to_all:
@@ -46,51 +45,46 @@ def correct_metadata(
             'Either `--all` or two arguments (dandiset, dandiset_version) must be provided.'
         )
 
-    # Func to use to correct the metadata (update as needed)
-    correct_func: Callable[[dict], dict | None] = correct_affiliation_corruption
+    # Get version queryset
+    vers = Version.objects.all()
+    if not apply_to_all:
+        dandiset = typing.cast(str, dandiset)
+        vers = vers.filter(dandiset=int(dandiset), version=dandiset_version)
 
-    vers = (
-        Version.objects.all()
-        if apply_to_all
-        else (Version.objects.get(dandiset=dandiset, version=dandiset_version),)
-    )
+    if not vers.exists():
+        click.echo('No matching versions found')
+        return
 
-    # List of changes to be applied to the database represented as tuples of the form
-    # version to be changed and the new metadata
-    changes: list[tuple[Version, dict]] = []
+    # For each version, find and fix metadata corruptions, along with saving out manifest files
     for ver in vers:
-        try:
-            meta_corrected = correct_func(ver.metadata)
-        except Exception as e:
-            click.echo(
-                f'{ver.dandiset.identifier}/{ver.version}: Failed to correct metadata with '
-                f'`{correct_func.__name__}()`'
-            )
-            click.echo(e)
-            raise click.Abort from e
+        new_meta = correct_affiliation_corruption(ver.metadata)
+        if new_meta is None:
+            continue
 
-        if meta_corrected is not None:
-            changes.append((ver, meta_corrected))
+        click.echo(f'Found corruption in {ver}')
+        if check:
+            continue
 
-    if changes:
-        # Apply the changes to the database atomically
+        # Save each version in a separate transaction to avoid de-sync with dandiset yaml/jsonld
         with transaction.atomic():
-            for ver, meta_corrected in changes:
-                ver.metadata = meta_corrected
-                ver.save()  # TODO: should I use `save()` in this case? It has some custom logic.
+            write_dandiset_yaml(ver)
+            write_dandiset_jsonld(ver)
+            click.echo(f'\tWrote dandiset yaml and json for version {ver}')
 
-        for ver, _ in changes:
-            write_manifest_files.delay(ver.id)  # TODO: Please check if this is needed
+            ver.metadata = new_meta
+            ver.save()
 
-            click.echo(
-                f'{ver.dandiset.identifier}/{ver.version}: Metadata corrected with '
-                f'`{correct_func.__name__}()`'
-            )
-    else:
+    # Remaining check is not needed since no data was modified
+    if check:
+        return
+
+    # If we find any un-fixed instances, raise exception
+    remaining = [ver for ver in vers if correct_affiliation_corruption(ver.metadata) is not None]
+    if remaining:
         click.echo(
-            f'No changes. No metadata of the Dandiset version(s) need to be corrected per '
-            f'`{correct_func.__name__}()`'
+            click.style(f'\nFound remaining corrupted versions: {remaining}', fg='red', bold=True)
         )
+        sys.exit(1)
 
 
 def correct_affiliation_corruption(meta: dict) -> dict | None:
@@ -104,42 +98,22 @@ def correct_affiliation_corruption(meta: dict) -> dict | None:
     Note: This function corrects the corruptions described in
         https://github.com/dandi/dandi-schema/issues/276
     """
-    unwanted_fields = ['contactPoint', 'includeInCitation', 'roleName']
-
-    meta_corrected = deepcopy(meta)
-    affiliation_objs = find_objs(meta_corrected, 'Affiliation')
-
-    corrected = False
-    for obj in affiliation_objs:
-        for field in unwanted_fields:
-            if field in obj:
-                del obj[field]
-                corrected = True
-
-    return meta_corrected if corrected else None
-
-
-def find_objs(instance: Any, schema_key: str) -> list[dict]:
-    """
-    Find JSON objects with a specified `"schemaKey"` field within a data instance.
-
-    :param instance: The data instance to find JSON objects from
-    :param schema_key: The `"schemaKey"` field value
-    :return: The list of JSON objects with the specified `"schemaKey"` in the data instance
-    """
-
-    def find_objs_(data: Any) -> None:
-        if isinstance(data, dict):
-            if 'schemaKey' in data and data['schemaKey'] == schema_key:
-                objs.append(data)
-            for value in data.values():
-                find_objs_(value)
-        elif isinstance(data, list):
-            for item in data:
-                find_objs_(item)
-        else:
-            return
-
-    objs: list[dict] = []
-    find_objs_(instance)
-    return objs
+    new_meta = deepcopy(meta)
+    correct_objs(new_meta)
+
+    return new_meta if new_meta != meta else None
+
+
+def correct_objs(data: Any) -> None:
+    if isinstance(data, dict):
+        if 'schemaKey' in data and data['schemaKey'] == 'Affiliation':
+            data.pop('contactPoint', None)
+            data.pop('includeInCitation', None)
+            data.pop('roleName', None)
+        for value in data.values():
+            correct_objs(value)
+    elif isinstance(data, list):
+        for item in data:
+            correct_objs(item)
+    else:
+        return
diff --git a/dandiapi/api/tests/test_correct_metadata.py b/dandiapi/api/tests/test_correct_metadata.py
index d4915d60c..6d9b734df 100644
--- a/dandiapi/api/tests/test_correct_metadata.py
+++ b/dandiapi/api/tests/test_correct_metadata.py
@@ -1,160 +1,10 @@
 from __future__ import annotations
 
 from copy import deepcopy
-from typing import Any
 
 import pytest
 
-from dandiapi.api.management.commands.correct_metadata import (
-    correct_affiliation_corruption,
-    find_objs,
-)
-
-
-@pytest.mark.parametrize(
-    ('instance', 'schema_key', 'expected'),
-    [
-        # Single matching object.
-        pytest.param(
-            {'schemaKey': 'Test', 'data': 123},
-            'Test',
-            [{'schemaKey': 'Test', 'data': 123}],
-            id='single-match',
-        ),
-        # No match.
-        pytest.param(
-            {'schemaKey': 'NotMatch', 'data': 123},
-            'Test',
-            [],
-            id='no-match',
-        ),
-        # Empty dictionary should return an empty list.
-        pytest.param(
-            {},
-            'Test',
-            [],
-            id='empty-dict',
-        ),
-        # Empty list should return an empty list.
-        pytest.param(
-            [],
-            'Test',
-            [],
-            id='empty-list',
-        ),
-        # Nested dictionary: the matching object is nested within another dictionary.
-        pytest.param(
-            {'level1': {'schemaKey': 'Test', 'info': 'nested'}},
-            'Test',
-            [{'schemaKey': 'Test', 'info': 'nested'}],
-            id='nested-dict',
-        ),
-        # List of dictionaries: only those with matching schema key are returned.
-        pytest.param(
-            [
-                {'schemaKey': 'Test', 'data': 1},
-                {'schemaKey': 'Test', 'data': 2},
-                {'schemaKey': 'NotTest', 'data': 3},
-            ],
-            'Test',
-            [
-                {'schemaKey': 'Test', 'data': 1},
-                {'schemaKey': 'Test', 'data': 2},
-            ],
-            id='list-of-dicts',
-        ),
-        # Mixed structure: nested dictionaries and lists.
-        pytest.param(
-            {
-                'a': {'schemaKey': 'Test', 'value': 1},
-                'b': [
-                    {'schemaKey': 'NotTest', 'value': 2},
-                    {'schemaKey': 'Test', 'value': 3},
-                ],
-                'c': 'irrelevant',
-                'd': [{'e': {'schemaKey': 'Test', 'value': 4}}],
-            },
-            'Test',
-            [
-                {'schemaKey': 'Test', 'value': 1},
-                {'schemaKey': 'Test', 'value': 3},
-                {'schemaKey': 'Test', 'value': 4},
-            ],
-            id='mixed-structure',
-        ),
-        # Non-collection type: integer.
-        pytest.param(
-            42,
-            'Test',
-            [],
-            id='non-collection-int',
-        ),
-        # Non-collection type: string.
-        pytest.param(
-            'some string',
-            'Test',
-            [],
-            id='non-collection-string',
-        ),
-        # Non-collection type: float.
-        pytest.param(
-            3.14,
-            'Test',
-            [],
-            id='non-collection-float',
-        ),
-        # Non-collection type: None.
-        pytest.param(
-            None,
-            'Test',
-            [],
-            id='non-collection-None',
-        ),
-        # Nested child: an object with the schema key contains a nested child that also
-        # has the schema key.
-        pytest.param(
-            {'schemaKey': 'Test', 'child': {'schemaKey': 'Test', 'data': 'child'}},
-            'Test',
-            [
-                {'schemaKey': 'Test', 'child': {'schemaKey': 'Test', 'data': 'child'}},
-                {'schemaKey': 'Test', 'data': 'child'},
-            ],
-            id='nested-child',
-        ),
-        # List in field:
-        # The object with the given schema key has a field whose value is a list
-        # containing objects, some of which also have the given schema key.
-        pytest.param(
-            {
-                'schemaKey': 'Test',
-                'items': [
-                    {'schemaKey': 'Test', 'data': 'item1'},
-                    {'schemaKey': 'Other', 'data': 'item2'},
-                    {'schemaKey': 'Test', 'data': 'item3'},
-                ],
-            },
-            'Test',
-            [
-                # The outer object is returned first...
-                {
-                    'schemaKey': 'Test',
-                    'items': [
-                        {'schemaKey': 'Test', 'data': 'item1'},
-                        {'schemaKey': 'Other', 'data': 'item2'},
-                        {'schemaKey': 'Test', 'data': 'item3'},
-                    ],
-                },
-                # ...followed by the matching objects within the list.
-                {'schemaKey': 'Test', 'data': 'item1'},
-                {'schemaKey': 'Test', 'data': 'item3'},
-            ],
-            id='list-in-field',
-        ),
-    ],
-)
-def test_find_objs_parametrized(instance: Any, schema_key: str, expected: list[dict]) -> None:
-    result = find_objs(instance, schema_key)
-    assert result == expected
+from dandiapi.api.management.commands.correct_metadata import correct_affiliation_corruption
 
 
 @pytest.mark.parametrize(

From b02e2ff9661732527745aa5c436c9777b58e25ab Mon Sep 17 00:00:00 2001
From: Jacob Nesbitt <jjnesbitt2@gmail.com>
Date: Wed, 12 Mar 2025 18:45:24 -0400
Subject: [PATCH 7/9] Only apply correction to draft versions

---
 .../management/commands/correct_metadata.py   | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index f437075be..e24c1afde 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -14,18 +14,17 @@
 
 @click.command(
     help='Correct corrupted metadata. If `--all` is provided, apply the correction to '
-    'all Dandiset versions. Otherwise, provide the Dandiset and Dandiset version to '
+    'all Dandiset versions. Otherwise, provide the Dandiset to '
     'apply the correction to.'
 )
 @click.argument('dandiset', required=False)
-@click.argument('dandiset_version', required=False)
 @click.option(
     '--all',
     'apply_to_all',
     is_flag=True,
     default=False,
     help='Apply the correction to all Dandiset versions '
-    '(cannot be combined with dandiset arguments).',
+    '(cannot be combined with dandiset argument).',
 )
 @click.option(
     '--check',
@@ -33,23 +32,19 @@
     help="Don't perform any changes, just check for corrupted metadata.",
 )
 def correct_metadata(  # noqa: C901
-    *, dandiset: str | None, dandiset_version: str | None, apply_to_all: bool, check: bool
+    *, dandiset: str | None, apply_to_all: bool, check: bool
 ):
     if apply_to_all:
-        if dandiset is not None or dandiset_version is not None:
-            raise click.UsageError(
-                'Cannot specify `--all` together with `dandiset` or `dandiset_version` arguments.'
-            )
-    elif dandiset is None or dandiset_version is None:
-        raise click.UsageError(
-            'Either `--all` or two arguments (dandiset, dandiset_version) must be provided.'
-        )
+        if dandiset is not None:
+            raise click.UsageError('Cannot specify `--all` together with `dandiset` argument.')
+    elif dandiset is None:
+        raise click.UsageError('Either `--all` or `dandiset` argument must be provided.')
 
     # Get version queryset
     vers = Version.objects.all()
     if not apply_to_all:
         dandiset = typing.cast(str, dandiset)
-        vers = vers.filter(dandiset=int(dandiset), version=dandiset_version)
+        vers = vers.filter(dandiset=int(dandiset), version='draft')
 
     if not vers.exists():
         click.echo('No matching versions found')

From c2c14f53be7093b557d11047cc36aad1a0c2b93a Mon Sep 17 00:00:00 2001
From: Jacob Nesbitt <jjnesbitt2@gmail.com>
Date: Thu, 27 Mar 2025 17:13:25 -0400
Subject: [PATCH 8/9] Add audit event to correct_metadata command

---
 dandiapi/api/management/commands/correct_metadata.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index e24c1afde..8d0a8de6e 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -10,6 +10,7 @@
 
 from dandiapi.api.manifests import write_dandiset_jsonld, write_dandiset_yaml
 from dandiapi.api.models import Version
+from dandiapi.api.services import audit
 
 
 @click.command(
@@ -69,6 +70,14 @@ def correct_metadata(  # noqa: C901
             ver.metadata = new_meta
             ver.save()
 
+            audit.update_metadata(
+                dandiset=ver.dandiset,
+                metadata=new_meta,
+                user=None,
+                admin=True,
+                description='Apply metadata correction from https://github.com/dandi/dandi-schema/issues/276',
+            )
+
     # Remaining check is not needed since no data was modified
     if check:
         return

From 3ee22a7ef036690c521bba225fd6fe8637f7bfea Mon Sep 17 00:00:00 2001
From: Jacob Nesbitt <jjnesbitt2@gmail.com>
Date: Fri, 28 Mar 2025 11:19:01 -0400
Subject: [PATCH 9/9] Use queryset iterators to reduce memory usage

Co-Authored-By: Mike VanDenburgh <michael.vandenburgh@kitware.com>
---
 dandiapi/api/management/commands/correct_metadata.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dandiapi/api/management/commands/correct_metadata.py b/dandiapi/api/management/commands/correct_metadata.py
index 8d0a8de6e..6a03ec974 100644
--- a/dandiapi/api/management/commands/correct_metadata.py
+++ b/dandiapi/api/management/commands/correct_metadata.py
@@ -52,7 +52,7 @@ def correct_metadata(  # noqa: C901
         return
 
     # For each version, find and fix metadata corruptions, along with saving out manifest files
-    for ver in vers:
+    for ver in vers.iterator():
         new_meta = correct_affiliation_corruption(ver.metadata)
         if new_meta is None:
             continue
@@ -83,7 +83,9 @@ def correct_metadata(  # noqa: C901
         return
 
     # If we find any un-fixed instances, raise exception
-    remaining = [ver for ver in vers if correct_affiliation_corruption(ver.metadata) is not None]
+    remaining = [
+        ver for ver in vers.iterator() if correct_affiliation_corruption(ver.metadata) is not None
+    ]
     if remaining:
         click.echo(
             click.style(f'\nFound remaining corrupted versions: {remaining}', fg='red', bold=True)