From 9c9b8f9c87f5d8dced0e45932344b28989a1b37d Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 21 Mar 2024 22:43:08 +0100
Subject: [PATCH 01/30] typos

---
 avalanche/benchmarks/datasets/dataset_utils.py     |  2 +-
 avalanche/benchmarks/utils/data.py                 | 10 ++++------
 avalanche/benchmarks/utils/flat_data.py            |  4 ++--
 .../from-zero-to-hero-tutorial/03_benchmarks.md    | 14 +++++++-------
 .../09_contribute-to-avalanche.md                  | 12 ++++++------
 5 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/avalanche/benchmarks/datasets/dataset_utils.py b/avalanche/benchmarks/datasets/dataset_utils.py
index 575093176..e26c0af7e 100644
--- a/avalanche/benchmarks/datasets/dataset_utils.py
+++ b/avalanche/benchmarks/datasets/dataset_utils.py
@@ -48,7 +48,7 @@ def load_config_file():
 
 
 def maybe_init_config_file():
-    """Initialize Avalanche user's config file, if it does not exists yet.
+    """Initialize Avalanche user's config file, if it does not exist yet.
 
     The file is located in `~/.avalanche/config.json`
     """
diff --git a/avalanche/benchmarks/utils/data.py b/avalanche/benchmarks/utils/data.py
index 9985e19fa..8dfb3ccba 100644
--- a/avalanche/benchmarks/utils/data.py
+++ b/avalanche/benchmarks/utils/data.py
@@ -50,7 +50,7 @@
 class AvalancheDataset(IDataset[T_co]):
     """Avalanche Dataset.
 
-    Avlanche dataset are pytorch-compatible Datasets with some additional
+    Avalanche dataset are pytorch-compatible Datasets with some additional
     functionality such as:
     - management of transformation groups via :class:`AvalancheTransform`
     - support for sample attributes such as class targets and task labels
@@ -79,9 +79,9 @@ class AvalancheDataset(IDataset[T_co]):
 
     switching to a different transformation group by calling the ``train()``,
     ``eval()`` or ``with_transforms` methods always returns a new dataset,
-    levaing the original one unchanged.
+    leaving the original one unchanged.
 
-    Ttransformation groups can be manipulated by removing, freezing, or
+    Transformation groups can be manipulated by removing, freezing, or
     replacing transformations. Each operation returns a new dataset, leaving
     the original one unchanged.
     """
@@ -103,9 +103,7 @@ def __init__(
             applied by this dataset.
         :param transform_groups: Avalanche transform groups.
         """
-        if issubclass(type(datasets), TorchDataset) or issubclass(
-            type(datasets), AvalancheDataset
-        ):
+        if isinstance(datasets, (TorchDataset, AvalancheDataset)):
             datasets = [datasets]  # type: ignore
 
         # NOTES on implementation:
diff --git a/avalanche/benchmarks/utils/flat_data.py b/avalanche/benchmarks/utils/flat_data.py
index 02d3681cd..0d6542f90 100644
--- a/avalanche/benchmarks/utils/flat_data.py
+++ b/avalanche/benchmarks/utils/flat_data.py
@@ -186,10 +186,10 @@ class FlatData(IDataset[T_co], Sequence[T_co]):
     """FlatData is a dataset optimized for efficient repeated concatenation
     and subset operations.
 
-    The class combines concatentation and subsampling operations in a single
+    The class combines concatenation and subsampling operations in a single
     class.
 
-    Class for internal use only. Users shuold use `AvalancheDataset` for data
+    Class for internal use only. Users should use `AvalancheDataset` for data
     or `DataAttribute` for attributes such as class and task labels.
 
     *Notes for subclassing*
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/03_benchmarks.md b/docs/gitbook/from-zero-to-hero-tutorial/03_benchmarks.md
index fd0e674d0..f9c5f41bd 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/03_benchmarks.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/03_benchmarks.md
@@ -43,10 +43,10 @@ from avalanche.benchmarks.utils import as_classification_dataset, AvalancheDatas
 
 # Most datasets in Avalanche are automatically downloaded the first time you use them
 # and stored in a default location. You can change this folder by calling
-# avalanche.benchmarks.utils.set_dataset_root(new_location)
+# avalanche.benchmarks.datasets.dataset_utils.set_dataset_root(new_location)
 datadir = default_dataset_location('mnist')
 
-# As we would simply do with any Pytorch dataset we can create the train and 
+# As we would simply do with any Pytorch dataset we can create the train and
 # test sets from it. We could use any of the above imported Datasets, but let's
 # just try to use the standard MNIST.
 train_MNIST = MNIST(datadir, train=True, download=True)
@@ -65,14 +65,14 @@ eval_transforms = torchvision.transforms.Compose([
 train_MNIST = as_classification_dataset(
     train_MNIST,
     transform_groups={
-        'train': train_transforms, 
+        'train': train_transforms,
         'eval': eval_transforms
     }
 )
 test_MNIST = as_classification_dataset(
     test_MNIST,
     transform_groups={
-        'train': train_transforms, 
+        'train': train_transforms,
         'eval': eval_transforms
     }
 )
@@ -117,7 +117,7 @@ print(list(dsub.targets))
 ## 🏛️ Classic Benchmarks
 
 Most benchmarks will provide two streams: the `train_stream` and `test_stream`.
-Often, these are two parallel streams of the same length, where each experience is sampled from the same distribution (e.g. same set of classes). 
+Often, these are two parallel streams of the same length, where each experience is sampled from the same distribution (e.g. same set of classes).
 Some benchmarks may have a single test experience with the whole test dataset.
 
 Experiences provide all the information needed to update the model, such as the new batch of data, and they may be decorated with attributes that are helpful for training or logging purposes.
@@ -248,7 +248,7 @@ print(f"Experience {exp.logging().current_experience}")
 
 #### Classification
 
-classification benchmarks follow the `ClassesTimeline` protocol and provide attributes about the classes in the stream. 
+classification benchmarks follow the `ClassesTimeline` protocol and provide attributes about the classes in the stream.
 
 
 ```python
@@ -322,7 +322,7 @@ for exp in online_train_stream:
     print(f"\tsize: {len(exp.dataset)}")
 
     # in a training loop, here you would train on the online_train_stream
-    # here you would test on bm.valid_stream or bm.test_stream 
+    # here you would test on bm.valid_stream or bm.test_stream
 ```
 
 This completes the "_Benchmark_" tutorial for the "_From Zero to Hero_" series. We hope you enjoyed it!
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/09_contribute-to-avalanche.md b/docs/gitbook/from-zero-to-hero-tutorial/09_contribute-to-avalanche.md
index 9885b49f5..7edbaaecf 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/09_contribute-to-avalanche.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/09_contribute-to-avalanche.md
@@ -1,4 +1,4 @@
-]---
+---
 description: How to Contribute Back to the Avalanche Community
 ---
 
@@ -62,14 +62,14 @@ USE_GPU=False FAST_TEST=True python -m unittest discover tests -v
 
 **Contribute to the Avalanche documentation**
 
-Apart from the code, you can also contribute to the Avalanche documentation 📚!  We use [Jupyter notebooks](https://jupyter.org/) to write the documentation, so both code and text can be smoothly inserted, and, as you may have noticed, all our documentation can be run on [Google Colab](https://colab.research.google.com/notebooks/intro.ipynb)! 
+Apart from the code, you can also contribute to the Avalanche documentation 📚!  We use [Jupyter notebooks](https://jupyter.org/) to write the documentation, so both code and text can be smoothly inserted, and, as you may have noticed, all our documentation can be run on [Google Colab](https://colab.research.google.com/notebooks/intro.ipynb)!
 
 To contribute to the documentation you need to follow the steps below:
 
-1. The notebooks are contained in the folder `notebooks`. The folder structure is specular to the documentation, so do not create or delete any folder. 
-2. Detect the notebook that you want to edit and do all the modifications 📝 
-3. Commit the changes and open a pull request (PR). 
-4. If your pull request will be accepted, your edited notebooks will be automatically converted and uploaded to the official Avalanche website 🎊! 
+1. The notebooks are contained in the folder `notebooks`. The folder structure is specular to the documentation, so do not create or delete any folder.
+2. Detect the notebook that you want to edit and do all the modifications 📝
+3. Commit the changes and open a pull request (PR).
+4. If your pull request will be accepted, your edited notebooks will be automatically converted and uploaded to the official Avalanche website 🎊!
 
 
 

From e2a3bd3523965e60c07e182af8eafd58e77ebebb Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Mon, 25 Mar 2024 20:00:06 +0100
Subject: [PATCH 02/30] WIP: refactoring (_init_transform_groups ->
 TransformGroups.create)

---
 avalanche/benchmarks/scenarios/__init__.py    |  1 +
 .../deprecated/generic_benchmark_creation.py  |  6 +-
 .../utils/classification_dataset.py           | 54 +++++------
 avalanche/benchmarks/utils/data.py            |  2 +-
 .../benchmarks/utils/dataset_definitions.py   |  2 +-
 .../benchmarks/utils/detection_dataset.py     | 38 ++++----
 .../benchmarks/utils/transform_groups.py      | 76 +++++++++++++++-
 avalanche/benchmarks/utils/utils.py           | 89 ++-----------------
 8 files changed, 136 insertions(+), 132 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/__init__.py b/avalanche/benchmarks/scenarios/__init__.py
index a3ecad792..1e72800bd 100644
--- a/avalanche/benchmarks/scenarios/__init__.py
+++ b/avalanche/benchmarks/scenarios/__init__.py
@@ -1,6 +1,7 @@
 from .generic_scenario import *
 from .deprecated.dataset_scenario import *
 from .deprecated.classification_scenario import *
+from .deprecated.generic_benchmark_creation import *
 from .deprecated.new_classes import *
 from .deprecated.new_instances import *
 
diff --git a/avalanche/benchmarks/scenarios/deprecated/generic_benchmark_creation.py b/avalanche/benchmarks/scenarios/deprecated/generic_benchmark_creation.py
index 351d7b6f3..1aed1bae3 100644
--- a/avalanche/benchmarks/scenarios/deprecated/generic_benchmark_creation.py
+++ b/avalanche/benchmarks/scenarios/deprecated/generic_benchmark_creation.py
@@ -31,7 +31,7 @@
 
 from avalanche.benchmarks.utils.classification_dataset import (
     _make_taskaware_tensor_classification_dataset,
-    _make_taskaware_classification_dataset,
+    _make_taskaware_classification_dataset, ClassificationDataset,
 )
 
 from avalanche.benchmarks.utils import (
@@ -197,7 +197,7 @@ class LazyStreamDefinition(NamedTuple):
     This class is a named tuple containing the fields required for defining
     a lazily-created benchmark.
 
-    - exps_generator: The experiences generator. Can be a "yield"-based
+    - exps_generator: The experience's generator. Can be a "yield"-based
       generator, a custom sequence, a standard list or any kind of
       iterable returning :class:`AvalancheDataset`.
     - stream_length: The number of experiences in the stream. Must match the
@@ -207,7 +207,7 @@ class LazyStreamDefinition(NamedTuple):
       can be used.
     """
 
-    exps_generator: Iterable[TaskAwareClassificationDataset]
+    exps_generator: Iterable[ClassificationDataset]
     """
     The experiences generator. Can be a "yield"-based generator, a custom
     sequence, a standard list or any kind of iterable returning
diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py
index ad6db47f6..848f15f36 100644
--- a/avalanche/benchmarks/utils/classification_dataset.py
+++ b/avalanche/benchmarks/utils/classification_dataset.py
@@ -26,7 +26,6 @@
     _count_unique,
     find_common_transforms_group,
     _init_task_labels,
-    _init_transform_groups,
     _split_user_def_targets,
     _split_user_def_task_label,
     _traverse_supported_dataset,
@@ -34,6 +33,7 @@
 
 from avalanche.benchmarks.utils.data import AvalancheDataset
 from avalanche.benchmarks.utils.transform_groups import (
+    TransformGroups,
     TransformGroupDef,
     DefaultTransformGroups,
     XTransform,
@@ -117,7 +117,7 @@ def task_pattern_indices(self) -> Dict[int, Sequence[int]]:
 
     @property
     def task_set(self: TClassificationDataset) -> TaskSet[TClassificationDataset]:
-        """Returns the datasets's ``TaskSet``, which is a mapping <task-id,
+        """Returns the dataset's ``TaskSet``, which is a mapping <task-id,
         task-dataset>."""
         return TaskSet(self)
 
@@ -296,12 +296,12 @@ def _make_taskaware_classification_dataset(
 
     is_supervised = isinstance(dataset, TaskAwareSupervisedClassificationDataset)
 
-    transform_gs = _init_transform_groups(
-        transform_groups,
-        transform,
-        target_transform,
-        initial_transform_group,
-        dataset,
+    transform_gs = TransformGroups.create(
+        transform_groups=transform_groups,
+        transform=transform,
+        target_transform=target_transform,
+        initial_transform_group=initial_transform_group,
+        dataset=dataset,
     )
     targets_data: Optional[DataAttribute[TTargetType]] = _init_targets(dataset, targets)
     task_labels_data: Optional[DataAttribute[int]] = _init_task_labels(
@@ -521,12 +521,12 @@ def _taskaware_classification_subset(
         dataset, task_labels, check_shape=False
     )
 
-    transform_gs = _init_transform_groups(
-        transform_groups,
-        transform,
-        target_transform,
-        initial_transform_group,
-        dataset,
+    transform_gs = TransformGroups.create(
+        transform_groups=transform_groups,
+        transform=transform,
+        target_transform=target_transform,
+        initial_transform_group=initial_transform_group,
+        dataset=dataset,
     )
 
     if initial_transform_group is not None and isinstance(dataset, AvalancheDataset):
@@ -695,12 +695,12 @@ def _make_taskaware_tensor_classification_dataset(
         tts.append(tt)
     dataset = _TensorClassificationDataset(*tts)
 
-    transform_gs = _init_transform_groups(
-        transform_groups,
-        transform,
-        target_transform,
-        initial_transform_group,
-        dataset,
+    transform_gs = TransformGroups.create(
+        transform_groups=transform_groups,
+        transform=transform,
+        target_transform=target_transform,
+        initial_transform_group=initial_transform_group,
+        dataset=dataset,
     )
     targets_data = _init_targets(dataset, targets)
     task_labels_data = _init_task_labels(dataset, task_labels)
@@ -896,12 +896,13 @@ def _concat_taskaware_classification_datasets(
         dds.append(dd)
 
     if len(dds) > 0:
-        transform_groups_obj = _init_transform_groups(
-            transform_groups,
-            transform,
-            target_transform,
-            initial_transform_group,
-            dds[0],
+        dataset = dds[0]
+        transform_groups_obj = TransformGroups.create(
+            transform_groups=transform_groups,
+            transform=transform,
+            target_transform=target_transform,
+            initial_transform_group=initial_transform_group,
+            dataset=dataset,
         )
     else:
         transform_groups_obj = None
@@ -1116,6 +1117,7 @@ def _as_taskaware_supervised_classification_dataset(
 
 
 __all__ = [
+    "ClassificationDataset",
     "SupportedDataset",
     "TaskAwareClassificationDataset",
     "TaskAwareSupervisedClassificationDataset",
diff --git a/avalanche/benchmarks/utils/data.py b/avalanche/benchmarks/utils/data.py
index 8dfb3ccba..9a20ed3da 100644
--- a/avalanche/benchmarks/utils/data.py
+++ b/avalanche/benchmarks/utils/data.py
@@ -98,7 +98,7 @@ def __init__(
     ):
         """Creates a ``AvalancheDataset`` instance.
 
-        :param dataset: Original dataset. Beware that
+        :param datasets: Original dataset. Beware that
             AvalancheDataset will not overwrite transformations already
             applied by this dataset.
         :param transform_groups: Avalanche transform groups.
diff --git a/avalanche/benchmarks/utils/dataset_definitions.py b/avalanche/benchmarks/utils/dataset_definitions.py
index de9fa05da..0dc63d9da 100644
--- a/avalanche/benchmarks/utils/dataset_definitions.py
+++ b/avalanche/benchmarks/utils/dataset_definitions.py
@@ -23,7 +23,7 @@
 #
 # That is, accept ISupportedClassificationDataset as parameter to
 # functions/constructors (when possible), but always expose/return instances of
-# ClassificationDataset to the, user (no matter what). The main difference is
+# ClassificationDataset to the user (no matter what). The main difference is
 # that ClassificationDataset is a subclass of the PyTorch Dataset while
 # ISupportedClassificationDataset is just a Protocol. This will allow the user
 # to pass any custom dataset while receiving Dataset subclasses as outputs at
diff --git a/avalanche/benchmarks/utils/detection_dataset.py b/avalanche/benchmarks/utils/detection_dataset.py
index 6c5efb43f..8f2c00e9a 100644
--- a/avalanche/benchmarks/utils/detection_dataset.py
+++ b/avalanche/benchmarks/utils/detection_dataset.py
@@ -38,7 +38,6 @@
 from avalanche.benchmarks.utils.utils import (
     TaskSet,
     _init_task_labels,
-    _init_transform_groups,
     _split_user_def_targets,
     _split_user_def_task_label,
     _traverse_supported_dataset,
@@ -272,12 +271,12 @@ def make_detection_dataset(
 
     is_supervised = isinstance(dataset, SupervisedDetectionDataset)
 
-    transform_gs = _init_transform_groups(
-        transform_groups,
-        transform,
-        target_transform,
-        initial_transform_group,
-        dataset,
+    transform_gs = TransformGroups.create(
+        transform_groups=transform_groups,
+        transform=transform,
+        target_transform=target_transform,
+        initial_transform_group=initial_transform_group,
+        dataset=dataset,
     )
     targets_data: Optional[DataAttribute[TTargetType]] = _init_targets(dataset, targets)
     task_labels_data: Optional[DataAttribute[int]] = _init_task_labels(
@@ -508,12 +507,12 @@ def detection_subset(
     del task_labels
     del targets
 
-    transform_gs = _init_transform_groups(
-        transform_groups,
-        transform,
-        target_transform,
-        initial_transform_group,
-        dataset,
+    transform_gs = TransformGroups.create(
+        transform_groups=transform_groups,
+        transform=transform,
+        target_transform=target_transform,
+        initial_transform_group=initial_transform_group,
+        dataset=dataset,
     )
 
     if initial_transform_group is not None and isinstance(dataset, AvalancheDataset):
@@ -741,12 +740,13 @@ def concat_detection_datasets(
         #######################################
         # TRANSFORMATION GROUPS
         #######################################
-        transform_groups_obj = _init_transform_groups(
-            transform_groups,
-            transform,
-            target_transform,
-            initial_transform_group,
-            dds[0],
+        dataset = dds[0]
+        transform_groups_obj = TransformGroups.create(
+            transform_groups=transform_groups,
+            transform=transform,
+            target_transform=target_transform,
+            initial_transform_group=initial_transform_group,
+            dataset=dataset,
         )
 
         # Find common "current_group" or use "train"
diff --git a/avalanche/benchmarks/utils/transform_groups.py b/avalanche/benchmarks/utils/transform_groups.py
index 0f9004df3..d24e119e2 100644
--- a/avalanche/benchmarks/utils/transform_groups.py
+++ b/avalanche/benchmarks/utils/transform_groups.py
@@ -28,9 +28,11 @@
     Union,
     Callable,
     Sequence,
-    Protocol,
+    Protocol, Self,
 )
 
+from avalanche.benchmarks import AvalancheDataset
+from avalanche.benchmarks.utils import _check_groups_dict_format
 from avalanche.benchmarks.utils.transforms import (
     MultiParamCompose,
     TupleTransform,
@@ -109,6 +111,78 @@ def __init__(
         if "eval" not in self.transform_groups:
             self.transform_groups["eval"] = None
 
+    @classmethod
+    def create(
+        cls,
+        transform_groups: Optional[Mapping[str, TransformGroupDef]],
+        transform: Optional[XTransform],
+        target_transform: Optional[YTransform],
+        initial_transform_group: Optional[str],
+        dataset,
+    ) -> Optional[Self]:
+        """
+        Initializes the transform groups for the given dataset.
+
+        This internal utility is commonly used to manage the transformation
+        definitions coming from the user-facing API. The user may want to
+        define transformations in a more classic (and simple) way by
+        passing a single `transform`, or in a more elaborate way by
+        passing a dictionary of groups (`transform_groups`).
+
+        :param transform_groups: The transform groups to use as a dictionary
+            (group_name -> group). Can be None. Mutually exclusive with
+            `targets` and `target_transform`
+        :param transform: The transformation for the X value. Can be None.
+        :param target_transform: The transformation for the Y value. Can be None.
+        :param initial_transform_group: The name of the initial group.
+            If None, 'train' will be used.
+        :param dataset: The avalanche dataset, used only to obtain the name of
+            the initial transformations groups if `initial_transform_group` is
+            None.
+        :returns: a :class:`TransformGroups` instance if any transformation
+            was passed, else None.
+        """
+        if transform_groups is not None and (
+            transform is not None or target_transform is not None
+        ):
+            raise ValueError(
+                "transform_groups can't be used with transform"
+                "and target_transform values"
+            )
+
+        if transform_groups is not None:
+            _check_groups_dict_format(transform_groups)
+
+        if initial_transform_group is None:
+            # Detect from the input dataset. If not an AvalancheDataset then
+            # use 'train' as the initial transform group
+            if (
+                isinstance(dataset, AvalancheDataset)
+                and dataset._flat_data._transform_groups is not None
+            ):
+                tgs = dataset._flat_data._transform_groups
+                initial_transform_group = tgs.current_group
+            else:
+                initial_transform_group = "train"
+
+        if transform_groups is None:
+            if target_transform is None and transform is None:
+                tgs = None
+            else:
+                tgs = TransformGroups(
+                    {
+                        "train": (transform, target_transform),
+                        "eval": (transform, target_transform),
+                    },
+                    current_group=initial_transform_group,
+                )
+        else:
+            tgs = TransformGroups(
+                transform_groups,
+                current_group=initial_transform_group,
+            )
+        return tgs
+
     def __getitem__(self, item):
         return self.transform_groups[item]
 
diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py
index 17df10fdc..628930b9b 100644
--- a/avalanche/benchmarks/utils/utils.py
+++ b/avalanche/benchmarks/utils/utils.py
@@ -45,14 +45,12 @@
 )
 from .flat_data import ConstantSequence
 from .transform_groups import (
-    TransformGroupDef,
     TransformGroups,
-    XTransform,
-    YTransform,
 )
 
 if TYPE_CHECKING:
-    from .classification_dataset import TaskAwareClassificationDataset
+    # Avoid cyclic imports
+    from .classification_dataset import ClassificationDataset, TaskAwareClassificationDataset
 
 T_co = TypeVar("T_co", covariant=True)
 TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")
@@ -185,8 +183,9 @@ def as_avalanche_dataset(
 def as_classification_dataset(
     dataset: ISupportedClassificationDataset[T_co],
     transform_groups: Optional[TransformGroups] = None,
-) -> "TaskAwareClassificationDataset":
-    """Converts a dataset with a `targets` field into an Avalanche ClassificationDataset."""
+) -> "ClassificationDataset":
+    """Converts a dataset with a `targets` field into a ClassificationDataset."""
+    # Avoid cyclic imports
     from avalanche.benchmarks.utils.classification_dataset import ClassificationDataset
 
     if isinstance(dataset, ClassificationDataset):
@@ -200,6 +199,7 @@ def as_classification_dataset(
 def as_taskaware_classification_dataset(
     dataset: ISupportedClassificationDataset[T_co],
 ) -> "TaskAwareClassificationDataset":
+    # Avoid cyclic imports
     from avalanche.benchmarks.utils.classification_dataset import (
         TaskAwareClassificationDataset,
     )
@@ -330,12 +330,7 @@ def _traverse_supported_dataset(
         datasets_len = []
         recursion_result = []
 
-        all_size = 0
-        for c_dataset in dataset.datasets:
-            len_dataset = len(c_dataset)
-            datasets_len.append(len_dataset)
-            all_size += len_dataset
-
+        all_size = len(dataset)
         for subset_idx in indices:
             dataset_idx, pattern_idx = find_list_from_index(
                 subset_idx, datasets_len, all_size
@@ -374,7 +369,7 @@ def _init_task_labels(
     Initializes the task label list (one for each pattern in the dataset).
 
     Precedence is given to the values contained in `task_labels` if passed.
-    Otherwisem the elements will be retrieved from the dataset itself by
+    Otherwise the elements will be retrieved from the dataset itself by
     traversing it and looking at the `targets_task_labels` field.
 
     :param dataset: The dataset for which the task labels list must be
@@ -455,74 +450,6 @@ def _select_task_labels(
     return found_task_labels
 
 
-def _init_transform_groups(
-    transform_groups: Optional[Mapping[str, TransformGroupDef]],
-    transform: Optional[XTransform],
-    target_transform: Optional[YTransform],
-    initial_transform_group: Optional[str],
-    dataset,
-) -> Optional[TransformGroups]:
-    """
-    Initializes the transform groups for the given dataset.
-
-    This internal utility is commonly used to manage the transformation
-    defintions coming from the user-facing API. The user may want to
-    define transformations in a more classic (and simple) way by
-    passing a single `transform`, or in a more elaborate way by
-    passing a dictionary of groups (`transform_groups`).
-
-    :param transform_groups: The transform groups to use as a dictionary
-        (group_name -> group). Can be None. Mutually exclusive with
-        `targets` and `target_transform`
-    :param transform: The transformation for the X value. Can be None.
-    :param target_transform: The transformation for the Y value. Can be None.
-    :param initial_transform_group: The name of the initial group.
-        If None, 'train' will be used.
-    :param dataset: The avalanche dataset, used only to obtain the name of
-        the initial transformations groups if `initial_transform_group` is
-        None.
-    :returns: a :class:`TransformGroups` instance if any transformation
-        was passed, else None.
-    """
-    if transform_groups is not None and (
-        transform is not None or target_transform is not None
-    ):
-        raise ValueError(
-            "transform_groups can't be used with transform"
-            "and target_transform values"
-        )
-
-    if transform_groups is not None:
-        _check_groups_dict_format(transform_groups)
-
-    if initial_transform_group is None:
-        # Detect from the input dataset. If not an AvalancheDataset then
-        # use 'train' as the initial transform group
-        if (
-            isinstance(dataset, AvalancheDataset)
-            and dataset._flat_data._transform_groups is not None
-        ):
-            tgs = dataset._flat_data._transform_groups
-            initial_transform_group = tgs.current_group
-        else:
-            initial_transform_group = "train"
-
-    if transform_groups is None:
-        if target_transform is None and transform is None:
-            tgs = None
-        else:
-            tgs = TransformGroups(
-                {
-                    "train": (transform, target_transform),
-                    "eval": (transform, target_transform),
-                },
-                current_group=initial_transform_group,
-            )
-    else:
-        tgs = TransformGroups(transform_groups, current_group=initial_transform_group)
-    return tgs
-
-
 def _check_groups_dict_format(groups_dict):
     # The original groups_dict must be convertible to native Python dict
     groups_dict = dict(groups_dict)

From f9936db06172849eeffc3c6843d1d33cf2cf89c1 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Mon, 25 Mar 2024 21:04:42 +0100
Subject: [PATCH 03/30] typo

---
 avalanche/benchmarks/utils/transform_groups.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/benchmarks/utils/transform_groups.py b/avalanche/benchmarks/utils/transform_groups.py
index d24e119e2..4ab60bc24 100644
--- a/avalanche/benchmarks/utils/transform_groups.py
+++ b/avalanche/benchmarks/utils/transform_groups.py
@@ -73,7 +73,7 @@ class TransformGroups:
     """Transformation groups for Avalanche datasets.
 
     TransformGroups supports preprocessing and augmentation pipelines for
-    Avalanche datasets. Transfomations are separated into groups (e.g. `train`
+    Avalanche datasets. Transformations are separated into groups (e.g. `train`
     transforms and `test` transforms), that can be easily switched using the
     `with_transform` method.
     """

From 471935b67fe2f32b402e74980bdba5fcd7bc2400 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Mon, 25 Mar 2024 23:51:34 +0100
Subject: [PATCH 04/30] revert e2a3bd35, enable transform groups kwargs for
 as_avalanche_dataset, as_taskaware_classification_dataset

---
 .../utils/classification_dataset.py           |  72 ++++---
 .../benchmarks/utils/detection_dataset.py     |   7 +-
 .../benchmarks/utils/transform_groups.py      |  78 +------
 avalanche/benchmarks/utils/utils.py           | 191 ++++++++++++------
 4 files changed, 172 insertions(+), 176 deletions(-)

diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py
index 848f15f36..0da356f1c 100644
--- a/avalanche/benchmarks/utils/classification_dataset.py
+++ b/avalanche/benchmarks/utils/classification_dataset.py
@@ -18,38 +18,6 @@
 """
 
 from functools import partial
-import torch
-from torch.utils.data.dataset import Subset, ConcatDataset, TensorDataset
-
-from avalanche.benchmarks.utils.utils import (
-    TaskSet,
-    _count_unique,
-    find_common_transforms_group,
-    _init_task_labels,
-    _split_user_def_targets,
-    _split_user_def_task_label,
-    _traverse_supported_dataset,
-)
-
-from avalanche.benchmarks.utils.data import AvalancheDataset
-from avalanche.benchmarks.utils.transform_groups import (
-    TransformGroups,
-    TransformGroupDef,
-    DefaultTransformGroups,
-    XTransform,
-    YTransform,
-)
-from avalanche.benchmarks.utils.data_attribute import DataAttribute
-from avalanche.benchmarks.utils.dataset_utils import (
-    SubSequence,
-)
-from avalanche.benchmarks.utils.flat_data import ConstantSequence
-from avalanche.benchmarks.utils.dataset_definitions import (
-    ISupportedClassificationDataset,
-    ITensorDataset,
-    IDatasetWithTargets,
-)
-
 from typing import (
     List,
     Any,
@@ -64,6 +32,36 @@
     overload,
 )
 
+import torch
+from torch.utils.data.dataset import Subset, ConcatDataset, TensorDataset
+
+from avalanche.benchmarks.utils.data import AvalancheDataset
+from avalanche.benchmarks.utils.data_attribute import DataAttribute
+from avalanche.benchmarks.utils.dataset_definitions import (
+    ISupportedClassificationDataset,
+    ITensorDataset,
+    IDatasetWithTargets,
+)
+from avalanche.benchmarks.utils.dataset_utils import (
+    SubSequence,
+)
+from avalanche.benchmarks.utils.flat_data import ConstantSequence
+from avalanche.benchmarks.utils.transform_groups import (
+    TransformGroupDef,
+    DefaultTransformGroups,
+    XTransform,
+    YTransform,
+)
+from avalanche.benchmarks.utils.utils import (
+    TaskSet,
+    _count_unique,
+    find_common_transforms_group,
+    _init_task_labels,
+    _init_transform_groups,
+    _split_user_def_targets,
+    _split_user_def_task_label,
+    _traverse_supported_dataset,
+)
 
 T_co = TypeVar("T_co", covariant=True)
 TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")
@@ -226,7 +224,7 @@ def _make_taskaware_classification_dataset(
     slicing and advanced indexing and it also contains useful fields as
     `targets`, which contains the pattern labels, and `targets_task_labels`,
     which contains the pattern task labels. The `task_set` field can be used to
-    obtain a the subset of patterns labeled with a given task label.
+    obtain a subset of patterns labeled with a given task label.
 
     This dataset can also be used to apply several advanced operations involving
     transformations. For instance, it allows the user to add and replace
@@ -296,7 +294,7 @@ def _make_taskaware_classification_dataset(
 
     is_supervised = isinstance(dataset, TaskAwareSupervisedClassificationDataset)
 
-    transform_gs = TransformGroups.create(
+    transform_gs = _init_transform_groups(
         transform_groups=transform_groups,
         transform=transform,
         target_transform=target_transform,
@@ -521,7 +519,7 @@ def _taskaware_classification_subset(
         dataset, task_labels, check_shape=False
     )
 
-    transform_gs = TransformGroups.create(
+    transform_gs = _init_transform_groups(
         transform_groups=transform_groups,
         transform=transform,
         target_transform=target_transform,
@@ -695,7 +693,7 @@ def _make_taskaware_tensor_classification_dataset(
         tts.append(tt)
     dataset = _TensorClassificationDataset(*tts)
 
-    transform_gs = TransformGroups.create(
+    transform_gs = _init_transform_groups(
         transform_groups=transform_groups,
         transform=transform,
         target_transform=target_transform,
@@ -897,7 +895,7 @@ def _concat_taskaware_classification_datasets(
 
     if len(dds) > 0:
         dataset = dds[0]
-        transform_groups_obj = TransformGroups.create(
+        transform_groups_obj = _init_transform_groups(
             transform_groups=transform_groups,
             transform=transform,
             target_transform=target_transform,
diff --git a/avalanche/benchmarks/utils/detection_dataset.py b/avalanche/benchmarks/utils/detection_dataset.py
index 8f2c00e9a..ad897bbdf 100644
--- a/avalanche/benchmarks/utils/detection_dataset.py
+++ b/avalanche/benchmarks/utils/detection_dataset.py
@@ -38,6 +38,7 @@
 from avalanche.benchmarks.utils.utils import (
     TaskSet,
     _init_task_labels,
+    _init_transform_groups,
     _split_user_def_targets,
     _split_user_def_task_label,
     _traverse_supported_dataset,
@@ -271,7 +272,7 @@ def make_detection_dataset(
 
     is_supervised = isinstance(dataset, SupervisedDetectionDataset)
 
-    transform_gs = TransformGroups.create(
+    transform_gs = _init_transform_groups(
         transform_groups=transform_groups,
         transform=transform,
         target_transform=target_transform,
@@ -507,7 +508,7 @@ def detection_subset(
     del task_labels
     del targets
 
-    transform_gs = TransformGroups.create(
+    transform_gs = _init_transform_groups(
         transform_groups=transform_groups,
         transform=transform,
         target_transform=target_transform,
@@ -741,7 +742,7 @@ def concat_detection_datasets(
         # TRANSFORMATION GROUPS
         #######################################
         dataset = dds[0]
-        transform_groups_obj = TransformGroups.create(
+        transform_groups_obj = _init_transform_groups(
             transform_groups=transform_groups,
             transform=transform,
             target_transform=target_transform,
diff --git a/avalanche/benchmarks/utils/transform_groups.py b/avalanche/benchmarks/utils/transform_groups.py
index d24e119e2..5293d4bb3 100644
--- a/avalanche/benchmarks/utils/transform_groups.py
+++ b/avalanche/benchmarks/utils/transform_groups.py
@@ -28,11 +28,9 @@
     Union,
     Callable,
     Sequence,
-    Protocol, Self,
+    Protocol,
 )
 
-from avalanche.benchmarks import AvalancheDataset
-from avalanche.benchmarks.utils import _check_groups_dict_format
 from avalanche.benchmarks.utils.transforms import (
     MultiParamCompose,
     TupleTransform,
@@ -73,7 +71,7 @@ class TransformGroups:
     """Transformation groups for Avalanche datasets.
 
     TransformGroups supports preprocessing and augmentation pipelines for
-    Avalanche datasets. Transfomations are separated into groups (e.g. `train`
+    Avalanche datasets. Transformations are separated into groups (e.g. `train`
     transforms and `test` transforms), that can be easily switched using the
     `with_transform` method.
     """
@@ -111,78 +109,6 @@ def __init__(
         if "eval" not in self.transform_groups:
             self.transform_groups["eval"] = None
 
-    @classmethod
-    def create(
-        cls,
-        transform_groups: Optional[Mapping[str, TransformGroupDef]],
-        transform: Optional[XTransform],
-        target_transform: Optional[YTransform],
-        initial_transform_group: Optional[str],
-        dataset,
-    ) -> Optional[Self]:
-        """
-        Initializes the transform groups for the given dataset.
-
-        This internal utility is commonly used to manage the transformation
-        definitions coming from the user-facing API. The user may want to
-        define transformations in a more classic (and simple) way by
-        passing a single `transform`, or in a more elaborate way by
-        passing a dictionary of groups (`transform_groups`).
-
-        :param transform_groups: The transform groups to use as a dictionary
-            (group_name -> group). Can be None. Mutually exclusive with
-            `targets` and `target_transform`
-        :param transform: The transformation for the X value. Can be None.
-        :param target_transform: The transformation for the Y value. Can be None.
-        :param initial_transform_group: The name of the initial group.
-            If None, 'train' will be used.
-        :param dataset: The avalanche dataset, used only to obtain the name of
-            the initial transformations groups if `initial_transform_group` is
-            None.
-        :returns: a :class:`TransformGroups` instance if any transformation
-            was passed, else None.
-        """
-        if transform_groups is not None and (
-            transform is not None or target_transform is not None
-        ):
-            raise ValueError(
-                "transform_groups can't be used with transform"
-                "and target_transform values"
-            )
-
-        if transform_groups is not None:
-            _check_groups_dict_format(transform_groups)
-
-        if initial_transform_group is None:
-            # Detect from the input dataset. If not an AvalancheDataset then
-            # use 'train' as the initial transform group
-            if (
-                isinstance(dataset, AvalancheDataset)
-                and dataset._flat_data._transform_groups is not None
-            ):
-                tgs = dataset._flat_data._transform_groups
-                initial_transform_group = tgs.current_group
-            else:
-                initial_transform_group = "train"
-
-        if transform_groups is None:
-            if target_transform is None and transform is None:
-                tgs = None
-            else:
-                tgs = TransformGroups(
-                    {
-                        "train": (transform, target_transform),
-                        "eval": (transform, target_transform),
-                    },
-                    current_group=initial_transform_group,
-                )
-        else:
-            tgs = TransformGroups(
-                transform_groups,
-                current_group=initial_transform_group,
-            )
-        return tgs
-
     def __getitem__(self, item):
         return self.transform_groups[item]
 
diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py
index 628930b9b..e11fa1544 100644
--- a/avalanche/benchmarks/utils/utils.py
+++ b/avalanche/benchmarks/utils/utils.py
@@ -11,6 +11,7 @@
 
 """ Common benchmarks/environments utils. """
 
+import warnings
 from collections import OrderedDict, defaultdict, deque
 from typing import (
     TYPE_CHECKING,
@@ -28,7 +29,6 @@
     Dict,
     SupportsInt,
 )
-import warnings
 
 import torch
 from torch import Tensor
@@ -45,13 +45,15 @@
 )
 from .flat_data import ConstantSequence
 from .transform_groups import (
-    TransformGroups,
+    TransformGroups, XTransform, YTransform, TransformGroupDef,
 )
 
 if TYPE_CHECKING:
     # Avoid cyclic imports
     from .classification_dataset import ClassificationDataset, TaskAwareClassificationDataset
 
+Y = TypeVar("Y")
+T = TypeVar("T")
 T_co = TypeVar("T_co", covariant=True)
 TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")
 
@@ -71,56 +73,6 @@ def tensor_as_list(sequence) -> List:
     return list(sequence)
 
 
-def _indexes_grouped_by_classes(
-    targets: Sequence[int],
-    patterns_indexes: Union[None, Sequence[int]],
-    sort_indexes: bool = True,
-    sort_classes: bool = True,
-) -> Union[List[int], None]:
-    result_per_class: Dict[int, List[int]] = OrderedDict()
-    result: List[int] = []
-
-    indexes_was_none = patterns_indexes is None
-
-    if patterns_indexes is not None:
-        patterns_indexes = tensor_as_list(patterns_indexes)
-    else:
-        patterns_indexes = list(range(len(targets)))
-
-    targets = tensor_as_list(targets)
-
-    # Consider that result_per_class is an OrderedDict
-    # This means that, if sort_classes is True, the next for statement
-    # will initialize "result_per_class" in sorted order which in turn means
-    # that patterns will be ordered by ascending class ID.
-    classes = torch.unique(torch.as_tensor(targets), sorted=sort_classes).tolist()
-
-    for class_id in classes:
-        result_per_class[class_id] = []
-
-    # Stores each pattern index in the appropriate class list
-    for idx in patterns_indexes:
-        result_per_class[targets[idx]].append(idx)
-
-    # Concatenate all the pattern indexes
-    for class_id in classes:
-        if sort_indexes:
-            result_per_class[class_id].sort()
-        result.extend(result_per_class[class_id])
-
-    if result == patterns_indexes and indexes_was_none:
-        # Result is [0, 1, 2, ..., N] and patterns_indexes was originally None
-        # This means that the user tried to obtain a full Dataset
-        # (indexes_was_none) only ordered according to the sort_indexes and
-        # sort_classes parameters. However, sort_indexes+sort_classes returned
-        # the plain pattern sequence as it already is. So the original Dataset
-        # already satisfies the sort_indexes+sort_classes constraints.
-        # By returning None, we communicate that the Dataset can be taken as-is.
-        return None
-
-    return result
-
-
 def grouped_and_ordered_indexes(
     targets: Sequence[int],
     patterns_indexes: Union[None, Sequence[int]],
@@ -174,15 +126,17 @@ def grouped_and_ordered_indexes(
 
 def as_avalanche_dataset(
     dataset: ISupportedClassificationDataset[T_co],
+    **kwargs,
 ) -> AvalancheDataset:
     if isinstance(dataset, AvalancheDataset):
         return dataset
-    return AvalancheDataset([dataset])
+    transform_groups = _init_transform_groups(**kwargs)
+    return AvalancheDataset([dataset], transform_groups=transform_groups)
 
 
 def as_classification_dataset(
     dataset: ISupportedClassificationDataset[T_co],
-    transform_groups: Optional[TransformGroups] = None,
+    **kwargs,
 ) -> "ClassificationDataset":
     """Converts a dataset with a `targets` field into a ClassificationDataset."""
     # Avoid cyclic imports
@@ -190,6 +144,7 @@ def as_classification_dataset(
 
     if isinstance(dataset, ClassificationDataset):
         return dataset
+    transform_groups = _init_transform_groups(**kwargs)
     da = DataAttribute(dataset.targets, "targets")
     return ClassificationDataset(
         [dataset], transform_groups=transform_groups, data_attributes=[da]
@@ -198,6 +153,7 @@ def as_classification_dataset(
 
 def as_taskaware_classification_dataset(
     dataset: ISupportedClassificationDataset[T_co],
+    **kwargs,
 ) -> "TaskAwareClassificationDataset":
     # Avoid cyclic imports
     from avalanche.benchmarks.utils.classification_dataset import (
@@ -206,7 +162,58 @@ def as_taskaware_classification_dataset(
 
     if isinstance(dataset, TaskAwareClassificationDataset):
         return dataset
-    return TaskAwareClassificationDataset([dataset])
+    transform_groups = _init_transform_groups(**kwargs)
+    return TaskAwareClassificationDataset([dataset], transform_groups=transform_groups)
+
+
+def _indexes_grouped_by_classes(
+    targets: Sequence[int],
+    patterns_indexes: Union[None, Sequence[int]],
+    sort_indexes: bool = True,
+    sort_classes: bool = True,
+) -> Union[List[int], None]:
+    result_per_class: Dict[int, List[int]] = OrderedDict()
+    result: List[int] = []
+
+    indexes_was_none = patterns_indexes is None
+
+    if patterns_indexes is not None:
+        patterns_indexes = tensor_as_list(patterns_indexes)
+    else:
+        patterns_indexes = list(range(len(targets)))
+
+    targets = tensor_as_list(targets)
+
+    # Consider that result_per_class is an OrderedDict
+    # This means that, if sort_classes is True, the next for statement
+    # will initialize "result_per_class" in sorted order which in turn means
+    # that patterns will be ordered by ascending class ID.
+    classes = torch.unique(torch.as_tensor(targets), sorted=sort_classes).tolist()
+
+    for class_id in classes:
+        result_per_class[class_id] = []
+
+    # Stores each pattern index in the appropriate class list
+    for idx in patterns_indexes:
+        result_per_class[targets[idx]].append(idx)
+
+    # Concatenate all the pattern indexes
+    for class_id in classes:
+        if sort_indexes:
+            result_per_class[class_id].sort()
+        result.extend(result_per_class[class_id])
+
+    if result == patterns_indexes and indexes_was_none:
+        # Result is [0, 1, 2, ..., N] and patterns_indexes was originally None
+        # This means that the user tried to obtain a full Dataset
+        # (indexes_was_none) only ordered according to the sort_indexes and
+        # sort_classes parameters. However, sort_indexes+sort_classes returned
+        # the plain pattern sequence as it already is. So the original Dataset
+        # already satisfies the sort_indexes+sort_classes constraints.
+        # By returning None, we communicate that the Dataset can be taken as-is.
+        return None
+
+    return result
 
 
 def _count_unique(*sequences: Sequence[SupportsInt]):
@@ -268,10 +275,6 @@ def find_common_transforms_group(
     return initial_transform_group
 
 
-Y = TypeVar("Y")
-T = TypeVar("T")
-
-
 def _traverse_supported_dataset(
     dataset: Y,
     values_selector: Callable[[Y, Optional[List[int]]], Optional[Sequence[T]]],
@@ -369,7 +372,7 @@ def _init_task_labels(
     Initializes the task label list (one for each pattern in the dataset).
 
     Precedence is given to the values contained in `task_labels` if passed.
-    Otherwise the elements will be retrieved from the dataset itself by
+    Otherwise, the elements will be retrieved from the dataset itself by
     traversing it and looking at the `targets_task_labels` field.
 
     :param dataset: The dataset for which the task labels list must be
@@ -450,6 +453,74 @@ def _select_task_labels(
     return found_task_labels
 
 
+def _init_transform_groups(
+    transform_groups: Optional[Mapping[str, TransformGroupDef]],
+    transform: Optional[XTransform],
+    target_transform: Optional[YTransform],
+    initial_transform_group: Optional[str],
+    dataset,
+) -> Optional[TransformGroups]:
+    """
+    Initializes the transform groups for the given dataset.
+
+    This internal utility is commonly used to manage the transformation
+    definitions coming from the user-facing API. The user may want to
+    define transformations in a more classic (and simple) way by
+    passing a single `transform`, or in a more elaborate way by
+    passing a dictionary of groups (`transform_groups`).
+
+    :param transform_groups: The transform groups to use as a dictionary
+        (group_name -> group). Can be None. Mutually exclusive with
+        `targets` and `target_transform`
+    :param transform: The transformation for the X value. Can be None.
+    :param target_transform: The transformation for the Y value. Can be None.
+    :param initial_transform_group: The name of the initial group.
+        If None, 'train' will be used.
+    :param dataset: The avalanche dataset, used only to obtain the name of
+        the initial transformations groups if `initial_transform_group` is
+        None.
+    :returns: a :class:`TransformGroups` instance if any transformation
+        was passed, else None.
+    """
+    if transform_groups is not None and (
+        transform is not None or target_transform is not None
+    ):
+        raise ValueError(
+            "transform_groups can't be used with transform"
+            "and target_transform values"
+        )
+
+    if transform_groups is not None:
+        _check_groups_dict_format(transform_groups)
+
+    if initial_transform_group is None:
+        # Detect from the input dataset. If not an AvalancheDataset then
+        # use 'train' as the initial transform group
+        if (
+            isinstance(dataset, AvalancheDataset)
+            and dataset._flat_data._transform_groups is not None
+        ):
+            tgs = dataset._flat_data._transform_groups
+            initial_transform_group = tgs.current_group
+        else:
+            initial_transform_group = "train"
+
+    if transform_groups is None:
+        if target_transform is None and transform is None:
+            tgs = None
+        else:
+            tgs = TransformGroups(
+                {
+                    "train": (transform, target_transform),
+                    "eval": (transform, target_transform),
+                },
+                current_group=initial_transform_group,
+            )
+    else:
+        tgs = TransformGroups(transform_groups, current_group=initial_transform_group)
+    return tgs
+
+
 def _check_groups_dict_format(groups_dict):
     # The original groups_dict must be convertible to native Python dict
     groups_dict = dict(groups_dict)

From f407b7a2b9695e1b4d7859107520000e054f33e0 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sun, 12 May 2024 21:35:45 +0200
Subject: [PATCH 05/30] typos, fix benchmark example code

---
 .../scenarios/deprecated/generators.py        |  1 +
 .../deprecated/new_classes/nc_scenario.py     |  2 +-
 avalanche/core.py                             |  6 ++---
 .../training/supervised/strategy_wrappers.py  |  2 +-
 .../training/templates/common_templates.py    |  2 +-
 .../05_evaluation.md                          | 22 +++++++++----------
 .../from-zero-to-hero-tutorial/06_loggers.md  |  4 ++--
 .../07_putting-all-together.md                |  2 +-
 .../learn-avalanche-in-5-minutes.md           | 14 ++++++------
 .../03_benchmarks.ipynb                       | 22 +++++++++++--------
 .../05_evaluation.ipynb                       |  2 +-
 .../06_loggers.ipynb                          |  2 +-
 .../07_putting-all-together.ipynb             |  2 +-
 .../learn-avalanche-in-5-minutes.ipynb        |  2 +-
 14 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/deprecated/generators.py b/avalanche/benchmarks/scenarios/deprecated/generators.py
index cb2fdf630..e411cb3e7 100644
--- a/avalanche/benchmarks/scenarios/deprecated/generators.py
+++ b/avalanche/benchmarks/scenarios/deprecated/generators.py
@@ -72,6 +72,7 @@
 TCLDataset = TypeVar("TCLDataset", bound="AvalancheDataset")
 
 
+# TODO: Nomenclature: experience vs task
 def nc_benchmark(
     train_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
     test_dataset: Union[Sequence[SupportedDataset], SupportedDataset],
diff --git a/avalanche/benchmarks/scenarios/deprecated/new_classes/nc_scenario.py b/avalanche/benchmarks/scenarios/deprecated/new_classes/nc_scenario.py
index a5509b18d..4430f0ea4 100644
--- a/avalanche/benchmarks/scenarios/deprecated/new_classes/nc_scenario.py
+++ b/avalanche/benchmarks/scenarios/deprecated/new_classes/nc_scenario.py
@@ -556,7 +556,7 @@ class NCExperience(ClassificationExperience[TaskAwareSupervisedClassificationDat
     def __init__(self, origin_stream: NCStream, current_experience: int):
         """
         Creates a ``NCExperience`` instance given the stream from this
-        experience was taken and and the current experience ID.
+        experience was taken and the current experience ID.
 
         :param origin_stream: The stream from which this experience was
             obtained.
diff --git a/avalanche/core.py b/avalanche/core.py
index 0d2364359..002f818cb 100644
--- a/avalanche/core.py
+++ b/avalanche/core.py
@@ -149,13 +149,13 @@ def after_eval_iteration(self, strategy: Template, *args, **kwargs) -> Any:
     def before_train_dataset_adaptation(
         self, strategy: Template, *args, **kwargs
     ) -> Any:
-        """Called before `train_dataset_adapatation` by the `BaseTemplate`."""
+        """Called before `train_dataset_adaptation` by the `BaseTemplate`."""
         pass
 
     def after_train_dataset_adaptation(
         self, strategy: Template, *args, **kwargs
     ) -> Any:
-        """Called after `train_dataset_adapatation` by the `BaseTemplate`."""
+        """Called after `train_dataset_adaptation` by the `BaseTemplate`."""
         pass
 
     def before_eval_dataset_adaptation(
@@ -177,7 +177,7 @@ class SupervisedPlugin(BaseSGDPlugin[Template], ABC):
 
     def __init__(self):
         """
-        Inizializes an instance of a supervised plugin.
+        Initializes an instance of a supervised plugin.
         """
         super().__init__()
 
diff --git a/avalanche/training/supervised/strategy_wrappers.py b/avalanche/training/supervised/strategy_wrappers.py
index 541d500b2..cea506950 100644
--- a/avalanche/training/supervised/strategy_wrappers.py
+++ b/avalanche/training/supervised/strategy_wrappers.py
@@ -56,7 +56,7 @@ class Naive(SupervisedTemplate):
     """Naive finetuning.
 
     The simplest (and least effective) Continual Learning strategy. Naive just
-    incrementally fine tunes a single model without employing any method
+    incrementally fine-tunes a single model without employing any method
     to contrast the catastrophic forgetting of previous knowledge.
     This strategy does not use task identities.
 
diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py
index 8405c5a6f..2328d4eab 100644
--- a/avalanche/training/templates/common_templates.py
+++ b/avalanche/training/templates/common_templates.py
@@ -46,7 +46,7 @@ class SupervisedTemplate(
     This strategy supports several continual learning scenarios:
 
     * class-incremental scenarios (no task labels)
-    * multi-task scenarios, where task labels are provided)
+    * multi-task scenarios, where task labels are provided
     * multi-incremental scenarios, where the same task may be revisited
 
     The exact scenario depends on the data stream and whether it provides
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/05_evaluation.md b/docs/gitbook/from-zero-to-hero-tutorial/05_evaluation.md
index 305b29547..2326f23b1 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/05_evaluation.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/05_evaluation.md
@@ -23,7 +23,7 @@ Each metric comes with a standalone class and a set of plugin classes aimed at e
 
 #### Standalone metric
 
-As an example, the standalone `Accuracy` class can be used to monitor the average accuracy over a stream of `<input,target>` pairs. The class provides an `update` method to update the current average accuracy, a `result` method to print the current average accuracy and a `reset` method to set the current average accuracy to zero. The call to `result`does not change the metric state.  
+As an example, the standalone `Accuracy` class can be used to monitor the average accuracy over a stream of `<input,target>` pairs. The class provides an `update` method to update the current average accuracy, a `result` method to print the current average accuracy and a `reset` method to set the current average accuracy to zero. The call to `result`does not change the metric state.
 
 The `TaskAwareAccuracy` metric keeps separate accuracy counters for different task labels. As such, it requires the `task_labels` parameter, which specifies which task is associated with the current patterns. The metric returns a dictionary mapping task labels to accuracy values.
 
@@ -71,7 +71,7 @@ print("Average Accuracy: ", acc) # output 0.5 for task 0
 task_label = 1
 predicted_y = torch.tensor([1,2]).float()
 acc_metric.update(real_y, predicted_y, task_label)
-acc = acc_metric.result() 
+acc = acc_metric.result()
 print("Average Accuracy: ", acc) # output 0.75 for task 0 and 1.0 for task 1
 
 task_label = 0
@@ -111,7 +111,7 @@ The **Evaluation Plugin** is the object in charge of configuring and controlling
 
 The Evaluation Plugin accepts as inputs the plugin metrics you want to track. In addition, you can add one or more loggers to print the metrics in different ways \(on file, on standard output, on Tensorboard...\).
 
-It is also recommended to pass to the Evaluation Plugin the benchmark instance used in the experiment. This allows the plugin to check for consistency during metrics computation. For example, the Evaluation Plugin checks that the `strategy.eval` calls are performed on the same stream or sub-stream. Otherwise, same metric could refer to different portions of the stream.  
+It is also recommended to pass to the Evaluation Plugin the benchmark instance used in the experiment. This allows the plugin to check for consistency during metrics computation. For example, the Evaluation Plugin checks that the `strategy.eval` calls are performed on the same stream or sub-stream. Otherwise, same metric could refer to different portions of the stream.
 These checks can be configured to raise errors (stopping computation) or only warnings.
 
 
@@ -134,7 +134,7 @@ model = SimpleMLP(num_classes=benchmark.n_classes)
 
 # DEFINE THE EVALUATION PLUGIN
 # The evaluation plugin manages the metrics computation.
-# It takes as argument a list of metrics, collectes their results and returns
+# It takes as argument a list of metrics, collects their results and returns
 # them to the strategy it is attached to.
 
 eval_plugin = EvaluationPlugin(
@@ -256,8 +256,8 @@ class MyPluginMetric(PluginMetric[float]):
             task_labels = strategy.mb_task_id
         else:
             task_labels = task_labels[0]
-            
-        self._accuracy_metric.update(strategy.mb_output, strategy.mb_y, 
+
+        self._accuracy_metric.update(strategy.mb_output, strategy.mb_y,
                                      task_labels)
 
     def before_training_epoch(self, strategy: 'PluggableStrategy') -> None:
@@ -271,8 +271,8 @@ class MyPluginMetric(PluginMetric[float]):
         Emit the result
         """
         return self._package_result(strategy)
-        
-        
+
+
     def _package_result(self, strategy):
         """Taken from `GenericPluginMetric`, check that class out!"""
         metric_value = self.accuracy_metric.result()
@@ -303,9 +303,9 @@ class MyPluginMetric(PluginMetric[float]):
 
 ## Accessing metric values
 
-If you want to access all the metrics computed during training and evaluation, you have to make sure that `collect_all=True` is set when creating the `EvaluationPlugin` (default option is `True`). This option maintains an updated version of all metric results in the plugin, which can be retrieved by calling `evaluation_plugin.get_all_metrics()`. You can call this methods whenever you need the metrics. 
+If you want to access all the metrics computed during training and evaluation, you have to make sure that `collect_all=True` is set when creating the `EvaluationPlugin` (default option is `True`). This option maintains an updated version of all metric results in the plugin, which can be retrieved by calling `evaluation_plugin.get_all_metrics()`. You can call this methods whenever you need the metrics.
 
-The result is a dictionary with full metric names as keys and a tuple of two lists as values. The first list stores all the `x` values recorded for that metric. Each `x` value represents the time step at which the corresponding metric value has been computed. The second list stores metric values associated to the corresponding `x` value. 
+The result is a dictionary with full metric names as keys and a tuple of two lists as values. The first list stores all the `x` values recorded for that metric. Each `x` value represents the time step at which the corresponding metric value has been computed. The second list stores metric values associated to the corresponding `x` value.
 
 
 ```python
@@ -332,7 +332,7 @@ d = eval_plugin.get_all_metrics()
 d['Top1_Acc_Epoch/train_phase/train_stream/Task000']
 ```
 
-Alternatively, the `train` and `eval` method of every `strategy` returns a dictionary storing, for each metric, the last value recorded for that metric. You can use these dictionaries to incrementally accumulate metrics. 
+Alternatively, the `train` and `eval` method of every `strategy` returns a dictionary storing, for each metric, the last value recorded for that metric. You can use these dictionaries to incrementally accumulate metrics.
 
 
 ```python
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/06_loggers.md b/docs/gitbook/from-zero-to-hero-tutorial/06_loggers.md
index e1ca65f60..8b83a307a 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/06_loggers.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/06_loggers.md
@@ -28,7 +28,7 @@ _Avalanche_ at the moment supports four main Loggers:
 * **TensorboardLogger**: It logs all the metrics on [Tensorboard](https://www.tensorflow.org/tensorboard) in real-time. Perfect for real-time plotting.
 * **WandBLogger**: It leverages [Weights and Biases](https://wandb.ai/site) tools to log metrics and results on a dashboard. It requires a W&B account.
 
-In order to keep track of when each metric value has been logged, we leverage two `global counters`, one for the training phase, one for the evaluation phase. 
+In order to keep track of when each metric value has been logged, we leverage two `global counters`, one for the training phase, one for the evaluation phase.
 You can see the `global counter` value reported in the x axis of the logged plots.
 
 Each `global counter` is an ever-increasing value which starts from 0 and it is increased by one each time a training/evaluation iteration is performed (i.e. after each training/evaluation minibatch).
@@ -56,7 +56,7 @@ model = SimpleMLP(num_classes=benchmark.n_classes)
 
 # DEFINE THE EVALUATION PLUGIN and LOGGERS
 # The evaluation plugin manages the metrics computation.
-# It takes as argument a list of metrics, collectes their results and returns
+# It takes as argument a list of metrics, collects their results and returns
 # them to the strategy it is attached to.
 
 
diff --git a/docs/gitbook/from-zero-to-hero-tutorial/07_putting-all-together.md b/docs/gitbook/from-zero-to-hero-tutorial/07_putting-all-together.md
index f9b02ada2..7825a2807 100644
--- a/docs/gitbook/from-zero-to-hero-tutorial/07_putting-all-together.md
+++ b/docs/gitbook/from-zero-to-hero-tutorial/07_putting-all-together.md
@@ -33,7 +33,7 @@ model = SimpleMLP(num_classes=scenario.n_classes)
 
 # DEFINE THE EVALUATION PLUGIN and LOGGERS
 # The evaluation plugin manages the metrics computation.
-# It takes as argument a list of metrics, collectes their results and returns
+# It takes as argument a list of metrics, collects their results and returns
 # them to the strategy it is attached to.
 
 # log to Tensorboard
diff --git a/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md b/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md
index c4d222855..ac2589778 100644
--- a/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md
+++ b/docs/gitbook/getting-started/learn-avalanche-in-5-minutes.md
@@ -116,7 +116,7 @@ for experience in train_stream:
     print("Start of task ", experience.task_label)
     print('Classes in this task:', experience.classes_in_this_experience)
 
-    # The current Pytorch training set can be easily recovered through the 
+    # The current Pytorch training set can be easily recovered through the
     # experience
     current_training_set = experience.dataset
     # ...as well as the task_label
@@ -211,7 +211,7 @@ class MyStrategy():
         self.criterion = criterion
 
     def train(self, experience):
-        # here you can implement your own training loop for each experience (i.e. 
+        # here you can implement your own training loop for each experience (i.e.
         # batch or task).
 
         train_dataset = experience.dataset
@@ -226,7 +226,7 @@ class MyStrategy():
                 pass
 
     def eval(self, experience):
-        # here you can implement your own eval loop for each experience (i.e. 
+        # here you can implement your own eval loop for each experience (i.e.
         # batch or task).
 
         eval_dataset = experience.dataset
@@ -277,8 +277,8 @@ Check out more details about what Avalanche can offer in this module following t
 
 The `evaluation` module is quite straightforward: it offers all the basic functionalities to evaluate and keep track of a continual learning experiment.
 
-This is mostly done through the **Metrics** and the **Loggers**. The **Metrics** provide a set of classes which implements the main continual learning metrics like Accuracy, Forgetting, Memory Usage, Running Times, etc.  
-Metrics should be created via the utility functions (e.g. `accuracy_metrics`, `timing_metrics` and others) specifying in the arguments when those metrics should be computed (after each minibatch, epoch, experience etc...).  
+This is mostly done through the **Metrics** and the **Loggers**. The **Metrics** provide a set of classes which implements the main continual learning metrics like Accuracy, Forgetting, Memory Usage, Running Times, etc.
+Metrics should be created via the utility functions (e.g. `accuracy_metrics`, `timing_metrics` and others) specifying in the arguments when those metrics should be computed (after each minibatch, epoch, experience etc...).
 The **Loggers** specify a way to report the metrics (e.g. with Tensorboard, on console or others). Loggers are created by instantiating the respective class.
 
 Metrics and loggers interact via the **Evaluation Plugin**: this is the main object responsible of tracking the experiment progress. Metrics and loggers are directly passed to the `EvaluationPlugin` instance. You will see the output of the loggers automatically during training and evaluation! Let's see how to put this together in few lines of code:
@@ -299,7 +299,7 @@ eval_plugin = EvaluationPlugin(
     loss_metrics(minibatch=True, stream=True),
     # catastrophic forgetting after each evaluation
     # experience
-    forgetting_metrics(experience=True, stream=True), 
+    forgetting_metrics(experience=True, stream=True),
     # add as many metrics as you like
     loggers=[InteractiveLogger(), TensorboardLogger()])
 
@@ -338,7 +338,7 @@ model = SimpleMLP(num_classes=benchmark.n_classes)
 
 # DEFINE THE EVALUATION PLUGIN and LOGGERS
 # The evaluation plugin manages the metrics computation.
-# It takes as argument a list of metrics, collectes their results and returns 
+# It takes as argument a list of metrics, collects their results and returns
 # them to the strategy it is attached to.
 
 # log to Tensorboard
diff --git a/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb b/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb
index 863554d9d..ce91d36f4 100644
--- a/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/03_benchmarks.ipynb
@@ -85,7 +85,7 @@
     "# avalanche.benchmarks.utils.set_dataset_root(new_location)\n",
     "datadir = default_dataset_location('mnist')\n",
     "\n",
-    "# As we would simply do with any Pytorch dataset we can create the train and \n",
+    "# As we would simply do with any Pytorch dataset we can create the train and\n",
     "# test sets from it. We could use any of the above imported Datasets, but let's\n",
     "# just try to use the standard MNIST.\n",
     "train_MNIST = MNIST(datadir, train=True, download=True)\n",
@@ -104,14 +104,14 @@
     "train_MNIST = as_classification_dataset(\n",
     "    train_MNIST,\n",
     "    transform_groups={\n",
-    "        'train': train_transforms, \n",
+    "        'train': train_transforms,\n",
     "        'eval': eval_transforms\n",
     "    }\n",
     ")\n",
     "test_MNIST = as_classification_dataset(\n",
     "    test_MNIST,\n",
     "    transform_groups={\n",
-    "        'train': train_transforms, \n",
+    "        'train': train_transforms,\n",
     "        'eval': eval_transforms\n",
     "    }\n",
     ")\n",
@@ -213,10 +213,12 @@
       "data: 12080 samples\n",
       "EID=4, classes=[4, 7], tasks=[4]\n",
       "data: 12107 samples\n",
-      "EID=0, classes=[5, 6], task=[4]\n",
-      "EID=1, classes=[1, 2], task=[4]\n",
-      "EID=2, classes=[0, 8], task=[4]\n",
-      "EID=3, classes=[9, 3], task=[4]\n",
+      "\n",
+      "--- Stream: test\n",
+      "EID=0, classes=[5, 6], task=[0]\n",
+      "EID=1, classes=[1, 2], task=[1]\n",
+      "EID=2, classes=[0, 8], task=[2]\n",
+      "EID=3, classes=[9, 3], task=[3]\n",
       "EID=4, classes=[4, 7], task=[4]\n"
      ]
     }
@@ -246,8 +248,10 @@
     "    # the experience provides a dataset\n",
     "    print(f\"data: {len(exp.dataset)} samples\")\n",
     "\n",
+    "print()\n",
+    "print(f'--- Stream: {bm.test_stream.name}')\n",
     "for exp in bm.test_stream:\n",
-    "    print(f\"EID={exp.current_experience}, classes={exp.classes_in_this_experience}, task={tls}\")\n"
+    "    print(f\"EID={exp.current_experience}, classes={exp.classes_in_this_experience}, task={exp.task_labels}\")\n"
    ]
   },
   {
@@ -12558,7 +12562,7 @@
     "    print(f\"\\tsize: {len(exp.dataset)}\")\n",
     "\n",
     "    # in a training loop, here you would train on the online_train_stream\n",
-    "    # here you would test on bm.valid_stream or bm.test_stream "
+    "    # here you would test on bm.valid_stream or bm.test_stream"
    ]
   },
   {
diff --git a/notebooks/from-zero-to-hero-tutorial/05_evaluation.ipynb b/notebooks/from-zero-to-hero-tutorial/05_evaluation.ipynb
index 35ed01a1e..d6cdfdb0a 100644
--- a/notebooks/from-zero-to-hero-tutorial/05_evaluation.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/05_evaluation.ipynb
@@ -566,7 +566,7 @@
     "\n",
     "# DEFINE THE EVALUATION PLUGIN\n",
     "# The evaluation plugin manages the metrics computation.\n",
-    "# It takes as argument a list of metrics, collectes their results and returns\n",
+    "# It takes as argument a list of metrics, collects their results and returns\n",
     "# them to the strategy it is attached to.\n",
     "\n",
     "eval_plugin = EvaluationPlugin(\n",
diff --git a/notebooks/from-zero-to-hero-tutorial/06_loggers.ipynb b/notebooks/from-zero-to-hero-tutorial/06_loggers.ipynb
index 61e43fefa..7ac80ec76 100644
--- a/notebooks/from-zero-to-hero-tutorial/06_loggers.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/06_loggers.ipynb
@@ -86,7 +86,7 @@
     "\n",
     "# DEFINE THE EVALUATION PLUGIN and LOGGERS\n",
     "# The evaluation plugin manages the metrics computation.\n",
-    "# It takes as argument a list of metrics, collectes their results and returns\n",
+    "# It takes as argument a list of metrics, collects their results and returns\n",
     "# them to the strategy it is attached to.\n",
     "\n",
     "\n",
diff --git a/notebooks/from-zero-to-hero-tutorial/07_putting-all-together.ipynb b/notebooks/from-zero-to-hero-tutorial/07_putting-all-together.ipynb
index 59a9492f1..97a42faec 100644
--- a/notebooks/from-zero-to-hero-tutorial/07_putting-all-together.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/07_putting-all-together.ipynb
@@ -429,7 +429,7 @@
     "\n",
     "# DEFINE THE EVALUATION PLUGIN and LOGGERS\n",
     "# The evaluation plugin manages the metrics computation.\n",
-    "# It takes as argument a list of metrics, collectes their results and returns\n",
+    "# It takes as argument a list of metrics, collects their results and returns\n",
     "# them to the strategy it is attached to.\n",
     "\n",
     "# log to Tensorboard\n",
diff --git a/notebooks/getting-started/learn-avalanche-in-5-minutes.ipynb b/notebooks/getting-started/learn-avalanche-in-5-minutes.ipynb
index 41238bb3a..aa4d655a7 100644
--- a/notebooks/getting-started/learn-avalanche-in-5-minutes.ipynb
+++ b/notebooks/getting-started/learn-avalanche-in-5-minutes.ipynb
@@ -836,7 +836,7 @@
         "\n",
         "# DEFINE THE EVALUATION PLUGIN and LOGGERS\n",
         "# The evaluation plugin manages the metrics computation.\n",
-        "# It takes as argument a list of metrics, collectes their results and returns \n",
+        "# It takes as argument a list of metrics, collects their results and returns \n",
         "# them to the strategy it is attached to.\n",
         "\n",
         "# log to Tensorboard\n",

From d1f171c0b011c27f964cd6eaf6cfd1b395971f3e Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Tue, 21 May 2024 21:16:02 +0200
Subject: [PATCH 06/30] export MNIST default transforms for composing to them

---
 avalanche/benchmarks/classic/cmnist.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/avalanche/benchmarks/classic/cmnist.py b/avalanche/benchmarks/classic/cmnist.py
index db6ef1297..107ad84b9 100644
--- a/avalanche/benchmarks/classic/cmnist.py
+++ b/avalanche/benchmarks/classic/cmnist.py
@@ -33,9 +33,8 @@
 )
 from avalanche.benchmarks.utils.data import make_avalanche_dataset
 
-_default_mnist_train_transform = Compose([Normalize((0.1307,), (0.3081,))])
-
-_default_mnist_eval_transform = Compose([Normalize((0.1307,), (0.3081,))])
+default_mnist_train_transform = Compose([Normalize((0.1307,), (0.3081,))])
+default_mnist_eval_transform = Compose([Normalize((0.1307,), (0.3081,))])
 
 
 class PixelsPermutation(object):
@@ -83,8 +82,8 @@ def SplitMNIST(
     shuffle: bool = True,
     class_ids_from_zero_in_each_exp: bool = False,
     class_ids_from_zero_from_first_exp: bool = False,
-    train_transform: Optional[Any] = _default_mnist_train_transform,
-    eval_transform: Optional[Any] = _default_mnist_eval_transform,
+    train_transform: Optional[Any] = default_mnist_train_transform,
+    eval_transform: Optional[Any] = default_mnist_eval_transform,
     dataset_root: Optional[Union[str, Path]] = None
 ):
     """
@@ -170,8 +169,8 @@ def PermutedMNIST(
     *,
     return_task_id=False,
     seed: Optional[int] = None,
-    train_transform: Optional[Any] = _default_mnist_train_transform,
-    eval_transform: Optional[Any] = _default_mnist_eval_transform,
+    train_transform: Optional[Any] = default_mnist_train_transform,
+    eval_transform: Optional[Any] = default_mnist_eval_transform,
     dataset_root: Optional[Union[str, Path]] = None
 ) -> NCScenario:
     """
@@ -268,8 +267,8 @@ def RotatedMNIST(
     return_task_id: bool = False,
     seed: Optional[int] = None,
     rotations_list: Optional[Sequence[int]] = None,
-    train_transform: Optional[Any] = _default_mnist_train_transform,
-    eval_transform: Optional[Any] = _default_mnist_eval_transform,
+    train_transform: Optional[Any] = default_mnist_train_transform,
+    eval_transform: Optional[Any] = default_mnist_eval_transform,
     dataset_root: Optional[Union[str, Path]] = None
 ) -> NCScenario:
     """Creates a Rotated MNIST benchmark.
@@ -379,7 +378,13 @@ def RotatedMNIST(
     )
 
 
-__all__ = ["SplitMNIST", "PermutedMNIST", "RotatedMNIST"]
+__all__ = [
+    "SplitMNIST",
+    "PermutedMNIST",
+    "RotatedMNIST",
+    "default_mnist_train_transform",
+    "default_mnist_eval_transform",
+]
 
 
 if __name__ == "__main__":

From a5bb5bcc61f09708e46fc75e44af98ef262f0c78 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sat, 25 May 2024 23:45:23 +0200
Subject: [PATCH 07/30] typos

---
 avalanche/benchmarks/scenarios/dataset_scenario.py | 2 +-
 avalanche/benchmarks/scenarios/generic_scenario.py | 2 +-
 avalanche/models/dynamic_modules.py                | 4 ++--
 examples/multihead.py                              | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
index d00f2ac04..d716d36a1 100644
--- a/avalanche/benchmarks/scenarios/dataset_scenario.py
+++ b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -119,7 +119,7 @@ def split_validation_random(
     a single parameter: the experience. Consider wrapping your custom
     splitting strategy with `partial` if more parameters are needed.
 
-    You can use this split strategy with methdos that require a custom
+    You can use this split strategy with methods that require a custom
     split strategy such as :func:`benchmark_with_validation_stream`to split
     a benchmark with::
 
diff --git a/avalanche/benchmarks/scenarios/generic_scenario.py b/avalanche/benchmarks/scenarios/generic_scenario.py
index 34da0d249..3dabc3012 100644
--- a/avalanche/benchmarks/scenarios/generic_scenario.py
+++ b/avalanche/benchmarks/scenarios/generic_scenario.py
@@ -603,7 +603,7 @@ def __init__(self, streams: Iterable[TCLStream]):
 
     @property
     def streams(self):
-        # we don't want in-place modifications so we return a copy
+        # we don't want in-place modifications, so we return a copy
         return copy(self._streams)
 
 
diff --git a/avalanche/models/dynamic_modules.py b/avalanche/models/dynamic_modules.py
index 436c75bee..8919fd1f3 100644
--- a/avalanche/models/dynamic_modules.py
+++ b/avalanche/models/dynamic_modules.py
@@ -120,7 +120,7 @@ class MultiTaskModule(DynamicModule):
     scenarios. The ``forward`` method accepts task labels, one for
     each sample in the mini-batch.
 
-    By default the ``forward`` method splits the mini-batch by task
+    By default, the ``forward`` method splits the mini-batch by task
     and calls ``forward_single_task``. Subclasses must implement
     ``forward_single_task`` or override `forward. If ``task_labels == None``,
     the output is computed in parallel for each task.
@@ -342,7 +342,7 @@ def __init__(
 
         # needs to create the first head because pytorch optimizers
         # fail when model.parameters() is empty.
-        # masking in IncrementalClassifier is unaware of task labels
+        # masking in IncrementalClassifier is unaware of task labels,
         # so we do masking here instead.
         first_head = IncrementalClassifier(
             self.in_features,
diff --git a/examples/multihead.py b/examples/multihead.py
index c0177418e..3cd76039a 100644
--- a/examples/multihead.py
+++ b/examples/multihead.py
@@ -10,7 +10,7 @@
 ################################################################################
 
 """
-This example trains a Multi-head model on Split MNIST with Elastich Weight
+This example trains a Multi-head model on Split MNIST with Elastic Weight
 Consolidation. Each experience has a different task label, which is used at test
 time to select the appropriate head.
 """

From f1a4d0f4772f84779a5706e415d95ee4d6a12a78 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sat, 25 May 2024 23:46:27 +0200
Subject: [PATCH 08/30] [docs] replace dataset with experience for dynamic
 moduels

---
 avalanche/models/dynamic_modules.py                  | 2 +-
 notebooks/from-zero-to-hero-tutorial/02_models.ipynb | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/avalanche/models/dynamic_modules.py b/avalanche/models/dynamic_modules.py
index 8919fd1f3..d842ecd75 100644
--- a/avalanche/models/dynamic_modules.py
+++ b/avalanche/models/dynamic_modules.py
@@ -142,7 +142,7 @@ def adaptation(self, experience: CLExperience):
 
         .. warning::
             As a general rule, you should NOT use this method to train the
-            model. The dataset should be used only to check conditions which
+            model. The experience should be used only to check conditions which
             require the model's adaptation, such as the discovery of new
             classes or tasks.
 
diff --git a/notebooks/from-zero-to-hero-tutorial/02_models.ipynb b/notebooks/from-zero-to-hero-tutorial/02_models.ipynb
index c7667f334..d92b84a4f 100644
--- a/notebooks/from-zero-to-hero-tutorial/02_models.ipynb
+++ b/notebooks/from-zero-to-hero-tutorial/02_models.ipynb
@@ -180,8 +180,8 @@
     "    def __init__(self, in_features, initial_out_features=2):\n",
     "        super().__init__()\n",
     "\n",
-    "    def adaptation(self, dataset):\n",
-    "        super().adaptation(dataset)\n",
+    "    def adaptation(self, experience):\n",
+    "        super().adaptation(experience)\n",
     "        # your adaptation goes here\n",
     "\n",
     "    def forward_single_task(self, x, task_label):\n",

From d7ad34b31c91aa9bb7993a65972e85ae6eee3d8e Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sat, 25 May 2024 23:47:32 +0200
Subject: [PATCH 09/30] defaults for _init_transform_groups (implementation
 covers these cases)

---
 avalanche/benchmarks/utils/utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py
index e11fa1544..1f206f028 100644
--- a/avalanche/benchmarks/utils/utils.py
+++ b/avalanche/benchmarks/utils/utils.py
@@ -454,11 +454,11 @@ def _select_task_labels(
 
 
 def _init_transform_groups(
-    transform_groups: Optional[Mapping[str, TransformGroupDef]],
-    transform: Optional[XTransform],
-    target_transform: Optional[YTransform],
-    initial_transform_group: Optional[str],
-    dataset,
+    transform_groups: Optional[Mapping[str, TransformGroupDef]] = None,
+    transform: Optional[XTransform] = None,
+    target_transform: Optional[YTransform] = None,
+    initial_transform_group: Optional[str] = None,
+    dataset: Optional[Any] = None,
 ) -> Optional[TransformGroups]:
     """
     Initializes the transform groups for the given dataset.

From f7d1b4e4e60ae27c0fbf4aae5a25846d45dd730a Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Mon, 27 May 2024 23:16:10 +0200
Subject: [PATCH 10/30] typos

---
 avalanche/core.py                                         | 6 +++---
 avalanche/models/dynamic_optimizers.py                    | 8 ++++----
 .../templates/observation_type/batch_observation.py       | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/avalanche/core.py b/avalanche/core.py
index 002f818cb..989228c45 100644
--- a/avalanche/core.py
+++ b/avalanche/core.py
@@ -19,7 +19,7 @@ class BasePlugin(Generic[Template], ABC):
     `StrategyCallbacks`
     provide two functions `before_{method}` and `after_{method}`, called
     before and after the method, respectively.
-    Therefore plugins can "inject" additional code by implementing callbacks.
+    Therefore, plugins can "inject" additional code by implementing callbacks.
     Each callback has a `strategy` argument that gives access to the state.
 
     In Avalanche, callbacks are used to implement continual strategies, metrics
@@ -33,7 +33,7 @@ class BasePlugin(Generic[Template], ABC):
 
     def __init__(self):
         """
-        Inizializes an instance of a supervised plugin.
+        Initializes an instance of a supervised plugin.
         """
         super().__init__()
 
@@ -82,7 +82,7 @@ class BaseSGDPlugin(BasePlugin[Template], ABC):
 
     def __init__(self):
         """
-        Inizializes an instance of a base SGD plugin.
+        Initializes an instance of a base SGD plugin.
         """
         super().__init__()
 
diff --git a/avalanche/models/dynamic_optimizers.py b/avalanche/models/dynamic_optimizers.py
index e48241ee9..073661f15 100644
--- a/avalanche/models/dynamic_optimizers.py
+++ b/avalanche/models/dynamic_optimizers.py
@@ -55,7 +55,7 @@ def reset_optimizer(optimizer, model):
 
 def update_optimizer(optimizer, new_params, optimized_params, reset_state=False):
     """Update the optimizer by adding new parameters,
-    removing removed parameters, and adding new parameters
+    removing obsolete parameters, and adding new parameters
     to the optimizer, for instance after model has been adapted
     to a new task. The state of the optimizer can also be reset,
     it will be reset for the modified parameters.
@@ -65,13 +65,13 @@ def update_optimizer(optimizer, new_params, optimized_params, reset_state=False)
     :param new_params: Dict (name, param) of new parameters
     :param optimized_params: Dict (name, param) of
         currently optimized parameters (returned by reset_optimizer)
-    :param reset_state: Wheter to reset the optimizer's state (i.e momentum).
+    :param reset_state: Whether to reset the optimizer's state (i.e. momentum).
         Defaults to False.
     :return: Dict (name, param) of optimized parameters
     """
     not_in_new, in_both, not_in_old = compare_keys(optimized_params, new_params)
     # Change reference to already existing parameters
-    # i.e growing IncrementalClassifier
+    # i.e. growing IncrementalClassifier
     for key in in_both:
         old_p_hash = optimized_params[key]
         new_p = new_params[key]
@@ -92,7 +92,7 @@ def update_optimizer(optimizer, new_params, optimized_params, reset_state=False)
             )
 
     # Remove parameters that are not here anymore
-    # This should not happend in most use case
+    # This should not happen in most use case
     keys_to_remove = []
     for key in not_in_new:
         old_p_hash = optimized_params[key]
diff --git a/avalanche/training/templates/observation_type/batch_observation.py b/avalanche/training/templates/observation_type/batch_observation.py
index 374d71c45..cdf30d0a5 100644
--- a/avalanche/training/templates/observation_type/batch_observation.py
+++ b/avalanche/training/templates/observation_type/batch_observation.py
@@ -48,7 +48,7 @@ def make_optimizer(self, reset_optimizer_state=False, **kwargs):
             - The first time this function is called
               for a given strategy it will reset the
               optimizer to gather the (name, param)
-              correspondance of the optimized parameters
+              correspondence of the optimized parameters
               all the model parameters will be put in the
               optimizer, regardless of what parameters are
               initially put in the optimizer.

From 6916ef0fa7e23ae8b526072a7ac6037c9a8c3456 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 30 May 2024 11:43:27 +0200
Subject: [PATCH 11/30] typo

---
 avalanche/training/templates/base_sgd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
index e54f402a6..01291f26d 100644
--- a/avalanche/training/templates/base_sgd.py
+++ b/avalanche/training/templates/base_sgd.py
@@ -387,7 +387,7 @@ def _obtain_common_dataloader_parameters(self, **kwargs):
         implementation (super) to obtain a base dictionary of parameters.
 
         However, if a more deep change is needed in the data loading procedure,
-        it is better to overrride :meth:`make_train_dataloader` and/or
+        it is better to override :meth:`make_train_dataloader` and/or
         :meth:`make_eval_dataloader` directly.
 
         Note: the resulting dictionary does not include the collate function

From b062b96e50c3f05ed25040ff52df9acf2067bde0 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 30 May 2024 11:44:35 +0200
Subject: [PATCH 12/30] split_validation_random: no default value for required
 'dataset' argument

---
 avalanche/benchmarks/scenarios/dataset_scenario.py    | 5 ++---
 avalanche/benchmarks/scenarios/validation_scenario.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
index d716d36a1..c2d53af06 100644
--- a/avalanche/benchmarks/scenarios/dataset_scenario.py
+++ b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -101,8 +101,8 @@ def _split_dataset_by_attribute(
 def split_validation_random(
     validation_size: Union[int, float],
     shuffle: bool,
+    dataset: AvalancheDataset,
     seed: Optional[int] = None,
-    dataset: Optional[AvalancheDataset] = None,
 ) -> Tuple[AvalancheDataset, AvalancheDataset]:
     """Splits an `AvalancheDataset` in two splits.
 
@@ -133,11 +133,10 @@ def split_validation_random(
         Otherwise, the first instances will be allocated to the training
         dataset by leaving the last ones to the validation dataset.
     :param dataset: The dataset to split.
+    :param seed: The random seed for shuffling the dataset.
     :return: A tuple containing 2 elements: the new training and validation
         datasets.
     """
-    if dataset is None:
-        raise ValueError("dataset must be provided")
     exp_indices = list(range(len(dataset)))
 
     if seed is None:
diff --git a/avalanche/benchmarks/scenarios/validation_scenario.py b/avalanche/benchmarks/scenarios/validation_scenario.py
index 74d3bb2ad..5a3eb46ae 100644
--- a/avalanche/benchmarks/scenarios/validation_scenario.py
+++ b/avalanche/benchmarks/scenarios/validation_scenario.py
@@ -70,7 +70,7 @@ def benchmark_with_validation_stream(
         # functools.partial is a more compact option
         # However, MyPy does not understand what a partial is -_-
         def random_validation_split_strategy_wrapper(data):
-            return split_validation_random(validation_size, shuffle, seed, data)
+            return split_validation_random(validation_size, shuffle, data, seed)
 
         split_strategy = random_validation_split_strategy_wrapper
     else:

From a6fcb39601cca652c18b23d717a3421a72530b37 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 30 May 2024 23:47:03 +0200
Subject: [PATCH 13/30] typo

---
 avalanche/benchmarks/scenarios/supervised.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/supervised.py b/avalanche/benchmarks/scenarios/supervised.py
index c9a5f36ce..ab980ed4a 100644
--- a/avalanche/benchmarks/scenarios/supervised.py
+++ b/avalanche/benchmarks/scenarios/supervised.py
@@ -103,14 +103,14 @@ def class_incremental_benchmark(
     classes_exp_assignment = []
     if num_experiences is not None:
         assert num_classes_per_exp is None, "BUG: num_classes_per_exp must be None"
-        curr_classess_per_exp: int = num_classes // num_experiences
+        curr_classes_per_exp: int = num_classes // num_experiences
         for eid in range(num_experiences):
             if eid == 0:
-                classes_exp_assignment.append(class_order[:curr_classess_per_exp])
+                classes_exp_assignment.append(class_order[:curr_classes_per_exp])
             else:
                 # final exp will take reminder of classes if they don't divide equally
-                start_idx = curr_classess_per_exp * eid
-                end_idx = start_idx + curr_classess_per_exp
+                start_idx = curr_classes_per_exp * eid
+                end_idx = start_idx + curr_classes_per_exp
                 classes_exp_assignment.append(class_order[start_idx:end_idx])
     elif num_classes_per_exp is not None:
         num_curr = 0

From 68cd80bec07de7d36644678c70e6c8a63affc52c Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 30 May 2024 23:48:32 +0200
Subject: [PATCH 14/30] [typing] use Self instead TypeVar, organize imports

---
 avalanche/benchmarks/utils/data.py | 49 ++++++++++++------------------
 1 file changed, 20 insertions(+), 29 deletions(-)

diff --git a/avalanche/benchmarks/utils/data.py b/avalanche/benchmarks/utils/data.py
index 9a20ed3da..757cf4fd9 100644
--- a/avalanche/benchmarks/utils/data.py
+++ b/avalanche/benchmarks/utils/data.py
@@ -17,13 +17,7 @@
 """
 import copy
 import warnings
-import numpy as np
-
-from torch.utils.data.dataloader import default_collate
-
-from avalanche.benchmarks.utils.dataset_definitions import IDataset
-from .data_attribute import DataAttribute
-
+from collections import OrderedDict
 from typing import (
     Dict,
     List,
@@ -36,14 +30,17 @@
     overload,
 )
 
-from .flat_data import FlatData
-from .transform_groups import TransformGroups, EmptyTransformGroups
+import numpy as np
 from torch.utils.data import Dataset as TorchDataset
-from collections import OrderedDict
+from torch.utils.data.dataloader import default_collate
+from typing_extensions import Self
 
+from avalanche.benchmarks.utils.dataset_definitions import IDataset
+from .data_attribute import DataAttribute
+from .flat_data import FlatData
+from .transform_groups import TransformGroups, EmptyTransformGroups
 
 T_co = TypeVar("T_co", covariant=True)
-TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")
 TDataWTransform = TypeVar("TDataWTransform", bound="_FlatDataWithTransform")
 
 
@@ -249,12 +246,10 @@ def __init__(
     def __len__(self) -> int:
         return len(self._flat_data)
 
-    def __add__(self: TAvalancheDataset, other: TAvalancheDataset) -> TAvalancheDataset:
+    def __add__(self, other: Self) -> Self:
         return self.concat(other)
 
-    def __radd__(
-        self: TAvalancheDataset, other: TAvalancheDataset
-    ) -> TAvalancheDataset:
+    def __radd__(self, other: Self) -> Self:
         return other.concat(self)
 
     @property
@@ -262,7 +257,7 @@ def _datasets(self):
         """Only for backward compatibility of old unit tests. Do not use."""
         return self._flat_data._datasets
 
-    def concat(self: TAvalancheDataset, other: TAvalancheDataset) -> TAvalancheDataset:
+    def concat(self, other: Self) -> Self:
         """Concatenate this dataset with other.
 
         :param other: Other dataset to concatenate.
@@ -270,7 +265,7 @@ def concat(self: TAvalancheDataset, other: TAvalancheDataset) -> TAvalancheDatas
         """
         return self.__class__([self, other])
 
-    def subset(self: TAvalancheDataset, indices: Sequence[int]) -> TAvalancheDataset:
+    def subset(self, indices: Sequence[int]) -> Self:
         """Subset this dataset.
 
         :param indices: The indices to keep.
@@ -286,14 +281,12 @@ def transform(self):
             "See the documentation for more info."
         )
 
-    def update_data_attribute(
-        self: TAvalancheDataset, name: str, new_value
-    ) -> TAvalancheDataset:
+    def update_data_attribute(self, name: str, new_value) -> Self:
         """
         Return a new dataset with the added or replaced data attribute.
 
-        If a object of type :class:`DataAttribute` is passed, then the data
-        attribute is setted as is.
+        If an object of type :class:`DataAttribute` is passed, then the data
+        attribute is set as is.
 
         Otherwise, if a raw value is passed, a new DataAttribute is created.
         If a DataAttribute with the same already exists, the use_in_getitem
@@ -345,11 +338,9 @@ def __eq__(self, other: object):
     def __getitem__(self, exp_id: int) -> T_co: ...
 
     @overload
-    def __getitem__(self: TAvalancheDataset, exp_id: slice) -> TAvalancheDataset: ...
+    def __getitem__(self, exp_id: slice) -> Self: ...  # type: ignore
 
-    def __getitem__(
-        self: TAvalancheDataset, idx: Union[int, slice]
-    ) -> Union[T_co, TAvalancheDataset]:
+    def __getitem__(self, idx: Union[int, slice]) -> Union[T_co, Self]:
         elem = self._flat_data[idx]
         for da in self._data_attributes.values():
             if da.use_in_getitem:
@@ -387,7 +378,7 @@ def eval(self):
         """
         return self.with_transforms("eval")
 
-    def with_transforms(self: TAvalancheDataset, group_name: str) -> TAvalancheDataset:
+    def with_transforms(self, group_name: str) -> Self:
         """
         Returns a new dataset with the transformations of a different group
         loaded.
@@ -401,7 +392,7 @@ def with_transforms(self: TAvalancheDataset, group_name: str) -> TAvalancheDatas
         datacopy._flat_data = datacopy._flat_data.with_transforms(group_name)
         return datacopy
 
-    def freeze_transforms(self: TAvalancheDataset) -> TAvalancheDataset:
+    def freeze_transforms(self) -> Self:
         """Returns a new dataset with the transformation groups frozen."""
         datacopy = self._shallow_clone_dataset()
         datacopy._flat_data = datacopy._flat_data.freeze_transforms()
@@ -422,7 +413,7 @@ def replace_current_transform_group(self, transform):
         datacopy._flat_data = fdata.replace_current_transform_group(transform)
         return datacopy
 
-    def _shallow_clone_dataset(self: TAvalancheDataset) -> TAvalancheDataset:
+    def _shallow_clone_dataset(self) -> Self:
         """Clone dataset.
         This is a shallow copy, i.e. the data attributes are not copied.
         """

From b94bed2dcce96b3d0e6c975898a1140b814b8088 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Fri, 31 May 2024 10:28:12 +0200
Subject: [PATCH 15/30] [benchmark] typing

---
 avalanche/benchmarks/scenarios/generic_scenario.py | 10 ++++++++--
 avalanche/benchmarks/scenarios/supervised.py       |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/generic_scenario.py b/avalanche/benchmarks/scenarios/generic_scenario.py
index 3dabc3012..ae1889893 100644
--- a/avalanche/benchmarks/scenarios/generic_scenario.py
+++ b/avalanche/benchmarks/scenarios/generic_scenario.py
@@ -36,7 +36,6 @@
     slice_alike_object_to_indices,
 )
 
-
 # Typing
 T = TypeVar("T")
 TCov = TypeVar("TCov", covariant=True)
@@ -590,6 +589,13 @@ class CLScenario(Generic[TCLStream]):
     provide access to past, current, and future data.
     """
 
+    # Define usual empty streams for typing
+    # TODO: If regarded unnecessary, the constructor magic should be removed
+    #  and `scenario.streams['train']` yields the correct type
+    train_stream = CLStream('train', [])
+    test_stream = CLStream('test', [])
+    valid_stream = CLStream('valid', [])
+
     def __init__(self, streams: Iterable[TCLStream]):
         """Creates an instance of a Continual Learning benchmark.
 
@@ -612,7 +618,7 @@ def make_stream(name: str, exps: Iterable[CLExperience]) -> CLStream:
 
     Uses the correct class for generators, sized generators, and lists.
 
-    :param new_name: The name of the new stream.
+    :param name: The name of the new stream.
     :param exps: sequence of experiences.
     """
     s_wrapped: CLStream
diff --git a/avalanche/benchmarks/scenarios/supervised.py b/avalanche/benchmarks/scenarios/supervised.py
index ab980ed4a..ac9c7d115 100644
--- a/avalanche/benchmarks/scenarios/supervised.py
+++ b/avalanche/benchmarks/scenarios/supervised.py
@@ -40,7 +40,7 @@ def class_incremental_benchmark(
     num_experiences: Optional[int] = None,
     num_classes_per_exp: Optional[Sequence[int]] = None,
     seed: Optional[int] = None,
-) -> CLScenario:
+) -> CLScenario[EagerCLStream[DatasetExperience]]:
     """Splits datasets according to a class-incremental scenario.
 
     Each dataset will create a stream with the same class order.

From 3021e90c732e53ebd5563cd5ed61ac10ce58fd8f Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Fri, 31 May 2024 10:28:25 +0200
Subject: [PATCH 16/30] [benchmark] typo

---
 avalanche/benchmarks/utils/detection_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/avalanche/benchmarks/utils/detection_dataset.py b/avalanche/benchmarks/utils/detection_dataset.py
index ad897bbdf..e6d63468b 100644
--- a/avalanche/benchmarks/utils/detection_dataset.py
+++ b/avalanche/benchmarks/utils/detection_dataset.py
@@ -199,10 +199,10 @@ def make_detection_dataset(
     this dataset, but it can also be used in a completely standalone manner.
 
     This dataset applies input/target transformations, it supports
-    slicing and advanced indexing and it also contains useful fields as
+    slicing, advanced indexing, and it also contains useful fields as
     `targets`, which contains the pattern dictionaries, and
     `targets_task_labels`, which contains the pattern task labels.
-    The `task_set` field can be used to obtain a the subset of patterns
+    The `task_set` field can be used to obtain a subset of patterns
     labeled with a given task label.
 
     This dataset can also be used to apply several advanced operations involving

From 8c819b7397bad8efe1ecfb0c0251cb49080954bd Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Fri, 31 May 2024 21:29:29 +0200
Subject: [PATCH 17/30] [benchmark] enhance split_validation_random typing

---
 avalanche/benchmarks/scenarios/dataset_scenario.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
index c2d53af06..8c5955ba4 100644
--- a/avalanche/benchmarks/scenarios/dataset_scenario.py
+++ b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -101,9 +101,9 @@ def _split_dataset_by_attribute(
 def split_validation_random(
     validation_size: Union[int, float],
     shuffle: bool,
-    dataset: AvalancheDataset,
+    dataset: TCLDataset,
     seed: Optional[int] = None,
-) -> Tuple[AvalancheDataset, AvalancheDataset]:
+) -> Tuple[TCLDataset, TCLDataset]:
     """Splits an `AvalancheDataset` in two splits.
 
     The default splitting strategy used by

From 393ba88dbb04425705250099e61a7e3487fffd69 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sat, 1 Jun 2024 10:36:32 +0200
Subject: [PATCH 18/30] [models] fix incorrectly merged optimizer

---
 avalanche/models/dynamic_optimizers.py | 37 +++++++++-----------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/avalanche/models/dynamic_optimizers.py b/avalanche/models/dynamic_optimizers.py
index 5bbf8bb38..98ed157f3 100644
--- a/avalanche/models/dynamic_optimizers.py
+++ b/avalanche/models/dynamic_optimizers.py
@@ -370,9 +370,10 @@ def update_optimizer(
 
     # Change reference to already existing parameters
     # i.e. growing IncrementalClassifier
-    for key in in_both:
-        old_p_hash = optimized_params[key]
-        new_p = new_params[key]
+    for name, group_idx, param_idx in changed_parameters:
+        group = optimizer.param_groups[group_idx]
+        old_p = optimized_params[name]
+        new_p = new_params[name]
         # Look for old parameter id in current optimizer
         group["params"][param_idx] = new_p
         if old_p in optimizer.state:
@@ -381,28 +382,14 @@ def update_optimizer(
 
     # Remove parameters that are not here anymore
     # This should not happen in most use cases
-    keys_to_remove = []
-    for key in not_in_new:
-        old_p_hash = optimized_params[key]
-        found = False
-        for i, group in enumerate(optimizer.param_groups):
-            keys_to_remove.append([])
-            for j, curr_p in enumerate(group["params"]):
-                if id(curr_p) == id(old_p_hash):
-                    found = True
-                    keys_to_remove[i].append((j, curr_p))
-                    optimized_params.pop(key)
-                    break
-        if not found:
-            raise Exception(
-                f"Parameter {key} expected but " "not found in the optimizer"
-            )
-
-    for i, idx_list in enumerate(keys_to_remove):
-        for j, p in sorted(idx_list, key=lambda x: x[0], reverse=True):
-            del optimizer.param_groups[i]["params"][j]
-            if p in optimizer.state:
-                optimizer.state.pop(p)
+    if remove_params:
+        for group_idx, idx_list in enumerate(not_found_in_parameters):
+            for j in sorted(idx_list, key=lambda x: x, reverse=True):
+                p = optimizer.param_groups[group_idx]["params"][j]
+                optimizer.param_groups[group_idx]["params"].pop(j)
+                if p in optimizer.state:
+                    optimizer.state.pop(p)
+                del p
 
     # Add newly added parameters (i.e Multitask, PNN)
 

From 13a33f5c9408e5cefade2c4bfa1910ac42e593ff Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sun, 2 Jun 2024 20:40:45 +0200
Subject: [PATCH 19/30] [training] fix imports/exports, typo

---
 avalanche/training/losses.py         | 3 +--
 avalanche/training/regularization.py | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/avalanche/training/losses.py b/avalanche/training/losses.py
index 460b01aa2..699ccd1d6 100644
--- a/avalanche/training/losses.py
+++ b/avalanche/training/losses.py
@@ -3,7 +3,6 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from torch import nn
 from torch.nn import BCELoss
 
 from avalanche.training.plugins import SupervisedPlugin
@@ -170,7 +169,7 @@ class MaskedCrossEntropy(SupervisedPlugin):
     Masked Cross Entropy
 
     This criterion can be used for instance in Class Incremental
-    Learning Problems when no examplars are used
+    Learning Problems when no exemplars are used
     (i.e LwF in Class Incremental Learning would need to use mask="new").
     """
 
diff --git a/avalanche/training/regularization.py b/avalanche/training/regularization.py
index dc4ab310f..0c660bbfb 100644
--- a/avalanche/training/regularization.py
+++ b/avalanche/training/regularization.py
@@ -353,4 +353,5 @@ def __call__(
     "LearningWithoutForgetting",
     "ACECriterion",
     "AMLCriterion",
+    "cross_entropy_with_oh_targets",
 ]

From ebc2aef32ee25aa9ba7bfd6535cf4ce1941ff8bc Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Sun, 2 Jun 2024 21:41:32 +0200
Subject: [PATCH 20/30] [notebooks] fix Avalanche dataset contructor

---
 notebooks/how-tos/avalanchedataset/avalanche-datasets.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/how-tos/avalanchedataset/avalanche-datasets.ipynb b/notebooks/how-tos/avalanchedataset/avalanche-datasets.ipynb
index d6aa4a99c..7d585c167 100644
--- a/notebooks/how-tos/avalanchedataset/avalanche-datasets.ipynb
+++ b/notebooks/how-tos/avalanchedataset/avalanche-datasets.ipynb
@@ -75,7 +75,7 @@
     "# Create the Dataset\n",
     "torch_data = TensorDataset(x_data, y_data)\n",
     "\n",
-    "avl_data = AvalancheDataset(torch_data)"
+    "avl_data = AvalancheDataset([torch_data])"
    ]
   },
   {

From 38173c183f075727f540d4b25045c8a1bdb4c69c Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Mon, 3 Jun 2024 11:09:35 +0200
Subject: [PATCH 21/30] [benchmarks] prefer make_avalanche_dataset over
 constructor for single dataset

---
 avalanche/benchmarks/utils/data.py              | 11 ++++++++++-
 avalanche/training/supervised/joint_training.py |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/avalanche/benchmarks/utils/data.py b/avalanche/benchmarks/utils/data.py
index 757cf4fd9..e2861e264 100644
--- a/avalanche/benchmarks/utils/data.py
+++ b/avalanche/benchmarks/utils/data.py
@@ -85,7 +85,11 @@ class AvalancheDataset(IDataset[T_co]):
 
     def __init__(
         self,
-        datasets: Sequence[IDataset[T_co]],
+        datasets: Union[
+            Sequence[IDataset[T_co]],
+            TorchDataset[T_co],
+            "AvalancheDataset[T_co]",
+        ],
         *,
         indices: Optional[List[int]] = None,
         data_attributes: Optional[List[DataAttribute]] = None,
@@ -100,7 +104,12 @@ def __init__(
             applied by this dataset.
         :param transform_groups: Avalanche transform groups.
         """
+        # TODO: Deprecate in favor of `make_avalanche_dataset`?
         if isinstance(datasets, (TorchDataset, AvalancheDataset)):
+            warnings.warn(
+                f"'datasets' argument should be a list of datasets, "
+                f"not {type(datasets).__name__}"
+            )
             datasets = [datasets]  # type: ignore
 
         # NOTES on implementation:
diff --git a/avalanche/training/supervised/joint_training.py b/avalanche/training/supervised/joint_training.py
index 08294cfc0..e50e6b63a 100644
--- a/avalanche/training/supervised/joint_training.py
+++ b/avalanche/training/supervised/joint_training.py
@@ -121,7 +121,7 @@ def train(
         trains on all of them at the same time (a.k.a. offline training).
 
         :param experiences: single Experience or sequence.
-        :param eval_streams: list of streams for evaluation.
+        :param eval_streams: sequence of streams for evaluation.
             If None: use training experiences for evaluation.
             Use [] if you do not want to evaluate during training.
 

From 15efc3cda2c745feee0ed5c1e6779f2557e9db1d Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Mon, 3 Jun 2024 22:27:24 +0200
Subject: [PATCH 22/30] [models] dynamic module typos

---
 avalanche/models/dynamic_modules.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/avalanche/models/dynamic_modules.py b/avalanche/models/dynamic_modules.py
index 436c75bee..1cd1f0e0b 100644
--- a/avalanche/models/dynamic_modules.py
+++ b/avalanche/models/dynamic_modules.py
@@ -60,7 +60,7 @@ class DynamicModule(Module):
     expanded to allow architectural modifications (multi-head
     classifiers, progressive networks, ...).
 
-    Compared to pytoch Modules, they provide an additional method,
+    Compared to pytorch Modules, they provide an additional method,
     `model_adaptation`, which adapts the model given the current experience.
     """
 
@@ -68,15 +68,15 @@ def __init__(self, auto_adapt=True):
         """
         :param auto_adapt: If True, will be adapted in the recursive adaptation loop
                            else, will be adapted by a module in charge
-                           (i.e IncrementalClassifier inside MultiHeadClassifier)
+                           (i.e. IncrementalClassifier inside MultiHeadClassifier)
         """
         super().__init__()
         self._auto_adapt = auto_adapt
 
     def recursive_adaptation(self, experience):
         """
-        Calls self.adaptation recursively accross
-        the hierarchy of pytorch module childrens
+        Calls self.adaptation recursively across
+        the hierarchy of pytorch module children
         """
         avalanche_model_adaptation(self, experience)
 
@@ -120,7 +120,7 @@ class MultiTaskModule(DynamicModule):
     scenarios. The ``forward`` method accepts task labels, one for
     each sample in the mini-batch.
 
-    By default the ``forward`` method splits the mini-batch by task
+    By default, the ``forward`` method splits the mini-batch by task
     and calls ``forward_single_task``. Subclasses must implement
     ``forward_single_task`` or override `forward. If ``task_labels == None``,
     the output is computed in parallel for each task.

From a8995fbd6c20c5a73db4fcc4b3be5a21abf00195 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 6 Jun 2024 22:44:17 +0200
Subject: [PATCH 23/30] [benchmarks] typos, doc notes

---
 avalanche/benchmarks/scenarios/generic_scenario.py | 3 ++-
 avalanche/benchmarks/scenarios/online.py           | 1 +
 avalanche/benchmarks/scenarios/task_aware.py       | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/generic_scenario.py b/avalanche/benchmarks/scenarios/generic_scenario.py
index ae1889893..c6ba25225 100644
--- a/avalanche/benchmarks/scenarios/generic_scenario.py
+++ b/avalanche/benchmarks/scenarios/generic_scenario.py
@@ -274,6 +274,7 @@ def _check_unset_attribute(attribute_name: str, attribute_value: Any):
         )
 
 
+# TODO: itertools.cycle?
 class GeneratorMemo(Generic[T]):
     def __init__(self, generator: Generator[T, None, None]):
         self._generator: Optional[Generator[T, None, None]] = generator
@@ -385,7 +386,7 @@ def __len__(self) -> int:
 
         :return: The number of experiences in this stream.
         """
-        pass
+        ...
 
 
 class SequenceCLStream(SizedCLStream[TCLExperience], Sequence[TCLExperience], ABC):
diff --git a/avalanche/benchmarks/scenarios/online.py b/avalanche/benchmarks/scenarios/online.py
index 1e51dfadc..4faafac7a 100644
--- a/avalanche/benchmarks/scenarios/online.py
+++ b/avalanche/benchmarks/scenarios/online.py
@@ -46,6 +46,7 @@ class CyclicSampler(Sampler):
     """Samples elements from [0,..,len(dataset)-1] in a cyclic manner."""
 
     def __init__(self, n_samples, shuffle=True, rng=None):
+        super().__init__()
         self.n_samples = n_samples
         self.rng = rng
         self.shuffle = shuffle
diff --git a/avalanche/benchmarks/scenarios/task_aware.py b/avalanche/benchmarks/scenarios/task_aware.py
index cd3d07508..824e4fdd1 100644
--- a/avalanche/benchmarks/scenarios/task_aware.py
+++ b/avalanche/benchmarks/scenarios/task_aware.py
@@ -124,7 +124,7 @@ def task_incremental_benchmark(bm: CLScenario, reset_task_labels=False) -> CLSce
 
         with_task_labels(benchmark_from_datasets(**dataset_streams)
 
-    :param **dataset_streams: keys are stream names, values are list of datasets.
+    :param dataset_streams: keys are stream names, values are list of datasets.
     :param reset_task_labels: whether existing task labels should be ignored.
         If False (default) if any dataset has task labels the function will raise
         a ValueError. If `True`, it will reset task labels.

From 14865737414fb33051fc62f2a75dc8f166d853b6 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 6 Jun 2024 22:46:12 +0200
Subject: [PATCH 24/30] [benchmarks] typing

---
 .../benchmarks/scenarios/dataset_scenario.py  |  7 +++----
 avalanche/benchmarks/scenarios/supervised.py  | 16 ++++++--------
 .../benchmarks/utils/dataset_definitions.py   | 21 +++++++++----------
 3 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/avalanche/benchmarks/scenarios/dataset_scenario.py b/avalanche/benchmarks/scenarios/dataset_scenario.py
index 8c5955ba4..e7ad6a897 100644
--- a/avalanche/benchmarks/scenarios/dataset_scenario.py
+++ b/avalanche/benchmarks/scenarios/dataset_scenario.py
@@ -73,13 +73,12 @@ def __init__(
         self, *, dataset: TCLDataset, current_experience: Optional[int] = None
     ):
         super().__init__(current_experience=current_experience, origin_stream=None)
-        self._dataset: AvalancheDataset = dataset
+        self._dataset = dataset
 
     @property
-    def dataset(self) -> AvalancheDataset:
+    def dataset(self) -> TCLDataset:
         # dataset is a read-only property
-        data = self._dataset
-        return data
+        return self._dataset
 
 
 def _split_dataset_by_attribute(
diff --git a/avalanche/benchmarks/scenarios/supervised.py b/avalanche/benchmarks/scenarios/supervised.py
index ac9c7d115..b01dc6a1f 100644
--- a/avalanche/benchmarks/scenarios/supervised.py
+++ b/avalanche/benchmarks/scenarios/supervised.py
@@ -11,6 +11,7 @@
 
 """High-level benchmark generators for supervised scenarios such as class-incremental."""
 import warnings
+from collections.abc import Collection
 from copy import copy
 from typing import (
     Iterable,
@@ -22,6 +23,7 @@
 )
 
 import torch
+from typing_extensions import Self
 
 from avalanche.benchmarks.utils.classification_dataset import (
     ClassificationDataset,
@@ -30,7 +32,7 @@
 from avalanche.benchmarks.utils.data import AvalancheDataset
 from avalanche.benchmarks.utils.data_attribute import DataAttribute
 from .dataset_scenario import _split_dataset_by_attribute, DatasetExperience
-from .generic_scenario import CLScenario, CLStream, EagerCLStream
+from .generic_scenario import CLScenario, CLStream, EagerCLStream, CLExperience
 
 
 def class_incremental_benchmark(
@@ -40,7 +42,7 @@ def class_incremental_benchmark(
     num_experiences: Optional[int] = None,
     num_classes_per_exp: Optional[Sequence[int]] = None,
     seed: Optional[int] = None,
-) -> CLScenario[EagerCLStream[DatasetExperience]]:
+) -> CLScenario[EagerCLStream[DatasetExperience[ClassificationDataset]]]:
     """Splits datasets according to a class-incremental scenario.
 
     Each dataset will create a stream with the same class order.
@@ -120,7 +122,7 @@ def class_incremental_benchmark(
             num_curr += num_classes
 
     # create the streams using class_order to split the data
-    streams = []
+    streams: List[EagerCLStream[DatasetExperience[ClassificationDataset]]] = []
     for name, dd in datasets_dict.items():
         curr_stream = []
         data_by_class = _split_dataset_by_attribute(dd, "targets")
@@ -339,12 +341,6 @@ def new_instances_benchmark(
     return CLScenario(streams=[train_stream, test_stream])
 
 
-__all__ = [
-    "class_incremental_benchmark",
-    "new_instances_benchmark",
-]
-
-
 class ClassesTimeline(Protocol):
     """Experience decorator that provides info about classes occurrence over time."""
 
@@ -381,7 +377,7 @@ def _decorate_benchmark(obj: CLScenario):
             new_streams.append(_decorate_stream(s))
         return CLScenario(new_streams)
 
-    def _decorate_stream(obj: CLStream):
+    def _decorate_stream(obj: CLStream[DatasetExperience[ClassificationDataset]]):
         # TODO: support stream generators. Should return a new generators which applies
         #  foo_decorate_exp every time a new experience is generated.
         new_stream = []
diff --git a/avalanche/benchmarks/utils/dataset_definitions.py b/avalanche/benchmarks/utils/dataset_definitions.py
index 0dc63d9da..11187ca92 100644
--- a/avalanche/benchmarks/utils/dataset_definitions.py
+++ b/avalanche/benchmarks/utils/dataset_definitions.py
@@ -111,11 +111,13 @@ class IClassificationDataset(IDatasetWithTargets[T_co, int], Protocol):
     protocol see :class:`ISupportedClassificationDataset`.
     """
 
-    targets: Sequence[int]
-    """
-    A sequence of ints describing the label of each pattern contained in the
-    dataset.
-    """
+    @property
+    def targets(self) -> Sequence[int]:
+        """
+        A sequence of ints describing the label of each pattern contained in the
+        dataset.
+        """
+        ...
 
 
 class ClassificationDataset(IClassificationDataset[T_co], Dataset):
@@ -126,12 +128,9 @@ class ClassificationDataset(IClassificationDataset[T_co], Dataset):
     The actual value of the targets field should be set by the child class.
     """
 
-    def __init__(self):
-        self.targets = []
-        """
-        A sequence of ints describing the label of each pattern contained in the
-        dataset.
-        """
+    @property
+    def targets(self) -> Sequence[int]:
+        return []
 
 
 __all__ = [

From 7cc252f56a3ab67a4d5e8b255927e8b0fece165c Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Fri, 7 Jun 2024 10:39:47 +0200
Subject: [PATCH 25/30] [benchmarks] typing

---
 avalanche/benchmarks/utils/classification_dataset.py | 9 ++++-----
 avalanche/benchmarks/utils/utils.py                  | 4 +++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/avalanche/benchmarks/utils/classification_dataset.py b/avalanche/benchmarks/utils/classification_dataset.py
index 0da356f1c..c781da8ca 100644
--- a/avalanche/benchmarks/utils/classification_dataset.py
+++ b/avalanche/benchmarks/utils/classification_dataset.py
@@ -16,7 +16,6 @@
 labels automatically. Concatenation and subsampling operations are optimized
 to be used frequently, as is common in replay strategies.
 """
-
 from functools import partial
 from typing import (
     List,
@@ -29,7 +28,7 @@
     Dict,
     Tuple,
     Mapping,
-    overload,
+    overload, Self,
 )
 
 import torch
@@ -64,11 +63,11 @@
 )
 
 T_co = TypeVar("T_co", covariant=True)
-TAvalancheDataset = TypeVar("TAvalancheDataset", bound="AvalancheDataset")
+TAvalancheDataset = TypeVar("TAvalancheDataset", bound=AvalancheDataset)
 TTargetType = int
 
 TClassificationDataset = TypeVar(
-    "TClassificationDataset", bound="ClassificationDataset"
+    "TClassificationDataset", bound=IDatasetWithTargets
 )
 
 
@@ -114,7 +113,7 @@ def task_pattern_indices(self) -> Dict[int, Sequence[int]]:
         return self.targets_task_labels.val_to_idx  # type: ignore
 
     @property
-    def task_set(self: TClassificationDataset) -> TaskSet[TClassificationDataset]:
+    def task_set(self) -> TaskSet[Self]:
         """Returns the dataset's ``TaskSet``, which is a mapping <task-id,
         task-dataset>."""
         return TaskSet(self)
diff --git a/avalanche/benchmarks/utils/utils.py b/avalanche/benchmarks/utils/utils.py
index 1f206f028..26e3797ef 100644
--- a/avalanche/benchmarks/utils/utils.py
+++ b/avalanche/benchmarks/utils/utils.py
@@ -653,13 +653,15 @@ class TaskSet(Mapping[int, TAvalancheDataset], Generic[TAvalancheDataset]):
 
     """
 
+    data: TAvalancheDataset
+
     def __init__(self, data: TAvalancheDataset):
         """Constructor.
 
         :param data: original data
         """
         super().__init__()
-        self.data: TAvalancheDataset = data
+        self.data = data
 
     def __iter__(self) -> Iterator[int]:
         t_labels = self._get_task_labels_field()

From 65ba3d44ad37023adf2573fa23823ceb6447f708 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Tue, 11 Jun 2024 22:39:09 +0200
Subject: [PATCH 26/30] [training] fix typing and typo in SCR

---
 .../training/supervised/supervised_contrastive_replay.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/avalanche/training/supervised/supervised_contrastive_replay.py b/avalanche/training/supervised/supervised_contrastive_replay.py
index f4dd44a0a..ddff093dd 100644
--- a/avalanche/training/supervised/supervised_contrastive_replay.py
+++ b/avalanche/training/supervised/supervised_contrastive_replay.py
@@ -24,7 +24,7 @@ class SCR(SupervisedTemplate):
     embeddings produced by the encoder.
 
     Accuracy cannot be monitored during training (no NCM classifier).
-    During training, NCRLoss is monitored, while during eval
+    During training, SCRLoss is monitored, while during eval
     CrossEntropyLoss is monitored.
 
     The original paper uses an additional fine-tuning phase on the buffer
@@ -39,7 +39,7 @@ def __init__(
         optimizer: Optimizer,
         augmentations=Compose([Lambda(lambda el: el)]),
         mem_size: int = 100,
-        temperature: int = 0.1,
+        temperature: float = 0.1,
         train_mb_size: int = 1,
         batch_size_mem: int = 100,
         train_epochs: int = 1,

From c19558b613e6226ab95026baf4c939299ef37458 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Fri, 14 Jun 2024 23:07:01 +0200
Subject: [PATCH 27/30] [evaluation] typo, [training] better error message

---
 avalanche/evaluation/metrics/accuracy.py | 2 +-
 avalanche/training/losses.py             | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/avalanche/evaluation/metrics/accuracy.py b/avalanche/evaluation/metrics/accuracy.py
index e745bcf36..c41123359 100644
--- a/avalanche/evaluation/metrics/accuracy.py
+++ b/avalanche/evaluation/metrics/accuracy.py
@@ -38,7 +38,7 @@ class Accuracy(Metric[float]):
     def __init__(self):
         """Creates an instance of the standalone Accuracy metric.
 
-        By default this metric in its initial state will return an accuracy
+        By default, this metric in its initial state will return an accuracy
         value of 0. The metric can be updated by using the `update` method
         while the running accuracy can be retrieved using the `result` method.
         """
diff --git a/avalanche/training/losses.py b/avalanche/training/losses.py
index 699ccd1d6..4f6b61855 100644
--- a/avalanche/training/losses.py
+++ b/avalanche/training/losses.py
@@ -114,7 +114,10 @@ def forward(self, features, labels=None, mask=None):
         elif labels is not None:
             labels = labels.contiguous().view(-1, 1)
             if labels.shape[0] != batch_size:
-                raise ValueError("Num of labels does not match num of features")
+                raise ValueError(
+                    f"Num of labels {labels.shape[0]} does not match "
+                    f"num of features {batch_size}"
+                )
             mask = torch.eq(labels, labels.T).float().to(device)
         else:
             mask = mask.float().to(device)

From d08c64241e83552944dce9888e722db17a75f901 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Fri, 14 Jun 2024 23:11:47 +0200
Subject: [PATCH 28/30] [training] typing (optional evaluator, remove redundant
 generic + protocol), export BaseStrategyProtocol

---
 .../supervised/supervised_contrastive_replay.py   |  2 +-
 avalanche/training/templates/base_sgd.py          |  6 +++++-
 avalanche/training/templates/common_templates.py  |  2 +-
 .../training/templates/strategy_mixin_protocol.py |  4 ++--
 avalanche/training/utils.py                       | 15 +++++++++------
 5 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/avalanche/training/supervised/supervised_contrastive_replay.py b/avalanche/training/supervised/supervised_contrastive_replay.py
index ddff093dd..d1936b927 100644
--- a/avalanche/training/supervised/supervised_contrastive_replay.py
+++ b/avalanche/training/supervised/supervised_contrastive_replay.py
@@ -37,7 +37,7 @@ def __init__(
         *,
         model: SCRModel,
         optimizer: Optimizer,
-        augmentations=Compose([Lambda(lambda el: el)]),
+        augmentations=Lambda(lambda el: el),
         mem_size: int = 100,
         temperature: float = 0.1,
         train_mb_size: int = 1,
diff --git a/avalanche/training/templates/base_sgd.py b/avalanche/training/templates/base_sgd.py
index 17c46d3c7..40871ff14 100644
--- a/avalanche/training/templates/base_sgd.py
+++ b/avalanche/training/templates/base_sgd.py
@@ -64,6 +64,10 @@ def __init__(
         *,
         model: Module,
         optimizer: Optimizer,
+        # TODO: Make optional in base classes as subclasses may choose to implement
+        #  `def criterion()` that doesn't depend on `self._criterion`
+        #  (which is set in __init__). Subclasses using `_criterion` in `criterion()`
+        #  should then make the criterion kwarg mandatory
         criterion: CriterionType = CrossEntropyLoss(),
         train_mb_size: int = 1,
         train_epochs: int = 1,
@@ -71,7 +75,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence[BasePlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin, Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin], None,
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
diff --git a/avalanche/training/templates/common_templates.py b/avalanche/training/templates/common_templates.py
index ec131b015..744822a1b 100644
--- a/avalanche/training/templates/common_templates.py
+++ b/avalanche/training/templates/common_templates.py
@@ -92,7 +92,7 @@ def __init__(
         device: Union[str, torch.device] = "cpu",
         plugins: Optional[Sequence[BasePlugin]] = None,
         evaluator: Union[
-            EvaluationPlugin, Callable[[], EvaluationPlugin]
+            EvaluationPlugin, Callable[[], EvaluationPlugin], None,
         ] = default_evaluator,
         eval_every=-1,
         peval_mode="epoch",
diff --git a/avalanche/training/templates/strategy_mixin_protocol.py b/avalanche/training/templates/strategy_mixin_protocol.py
index 4596c9d39..d5b9c6c3a 100644
--- a/avalanche/training/templates/strategy_mixin_protocol.py
+++ b/avalanche/training/templates/strategy_mixin_protocol.py
@@ -21,7 +21,7 @@
 CriterionType: TypeAlias = Union[Module, Callable[[Tensor, Tensor], Tensor]]
 
 
-class BaseStrategyProtocol(Generic[TExperienceType], Protocol[TExperienceType]):
+class BaseStrategyProtocol(Protocol[TExperienceType]):
     model: Module
 
     device: torch.device
@@ -36,7 +36,6 @@ class BaseStrategyProtocol(Generic[TExperienceType], Protocol[TExperienceType]):
 
 
 class SGDStrategyProtocol(
-    Generic[TSGDExperienceType, TMBInput, TMBOutput],
     BaseStrategyProtocol[TSGDExperienceType],
     Protocol[TSGDExperienceType, TMBInput, TMBOutput],
 ):
@@ -120,6 +119,7 @@ def _after_outer_update(self, **kwargs): ...
 
 
 __all__ = [
+    "BaseStrategyProtocol",
     "SGDStrategyProtocol",
     "SupervisedStrategyProtocol",
     "MetaLearningStrategyProtocol",
diff --git a/avalanche/training/utils.py b/avalanche/training/utils.py
index 1427f4760..37af0b009 100644
--- a/avalanche/training/utils.py
+++ b/avalanche/training/utils.py
@@ -15,7 +15,8 @@
 
 """
 from collections import defaultdict
-from typing import Callable, Dict, List, NamedTuple, Optional, Tuple, Union
+from typing import Callable, Dict, List, NamedTuple, Optional, Tuple, Union, \
+    TYPE_CHECKING
 
 import torch
 from torch import Tensor
@@ -25,6 +26,9 @@
 from avalanche.benchmarks import OnlineCLExperience
 from avalanche.models.batch_renorm import BatchRenorm2D
 
+if TYPE_CHECKING:
+    from avalanche.training.templates.strategy_mixin_protocol import BaseStrategyProtocol
+
 
 def _at_task_boundary(training_experience, before=True) -> bool:
     """
@@ -53,6 +57,8 @@ def _at_task_boundary(training_experience, before=True) -> bool:
                 return True
             elif (not before) and training_experience.is_last_subexp:
                 return True
+            else:
+                return False
         else:
             return True
     else:
@@ -65,11 +71,8 @@ def cycle(loader):
             yield batch
 
 
-def trigger_plugins(strategy, event, **kwargs):
-    """Call plugins on a specific callback
-
-    :return:
-    """
+def trigger_plugins(strategy: "BaseStrategyProtocol", event: str, **kwargs):
+    """Call plugins on a specific callback"""
     for p in strategy.plugins:
         if hasattr(p, event):
             getattr(p, event)(strategy, **kwargs)

From c5c635dfca23852515054ffb7cb98e1518f26ae1 Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 20 Jun 2024 12:22:40 +0200
Subject: [PATCH 29/30] typo, documentation

---
 avalanche/checkpointing/checkpoint.py                    | 9 +++++----
 .../training/supervised/supervised_contrastive_replay.py | 3 ++-
 avalanche/training/templates/strategy_mixin_protocol.py  | 2 ++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/avalanche/checkpointing/checkpoint.py b/avalanche/checkpointing/checkpoint.py
index fbf6edf39..14de6d029 100644
--- a/avalanche/checkpointing/checkpoint.py
+++ b/avalanche/checkpointing/checkpoint.py
@@ -1,6 +1,7 @@
 import os.path
-from pathlib import Path
 from copy import copy
+from functools import partial
+from pathlib import Path
 from typing import (
     Any,
     Callable,
@@ -14,12 +15,12 @@
     Union,
     Collection,
 )
-from typing_extensions import TypeAlias
 
 import dill
 import torch
-from functools import partial
 from packaging.version import parse
+from typing_extensions import TypeAlias
+
 from .checkpoint_internals import (
     CHECKPOINT_MECHANISM_VERSION,
     _CheckpointLoadingContext,
@@ -119,7 +120,7 @@ def maybe_load_checkpoint(
     The method returns the strategy with the state deserialized from the file
     and the index of the training experience to resume training.
 
-    If the file does not exists, the method returns the strategy unmodified
+    If the file does not exist, the method returns the strategy unmodified
     and the index 0. As a result, the method can be safely called even if no
     checkpoint has been previously created (e.g. during the first run).
 
diff --git a/avalanche/training/supervised/supervised_contrastive_replay.py b/avalanche/training/supervised/supervised_contrastive_replay.py
index d1936b927..f3fa10217 100644
--- a/avalanche/training/supervised/supervised_contrastive_replay.py
+++ b/avalanche/training/supervised/supervised_contrastive_replay.py
@@ -112,10 +112,11 @@ def __init__(
             plugins = [self.replay_plugin] + plugins
         else:
             raise ValueError("`plugins` parameter needs to be a list.")
+
         super().__init__(
             model=model,
             optimizer=optimizer,
-            criterion=SCRLoss(temperature=self.temperature),
+            # criterion=SCRLoss(temperature=self.temperature),
             train_mb_size=train_mb_size,
             train_epochs=train_epochs,
             eval_mb_size=eval_mb_size,
diff --git a/avalanche/training/templates/strategy_mixin_protocol.py b/avalanche/training/templates/strategy_mixin_protocol.py
index d5b9c6c3a..a2ec0e4e5 100644
--- a/avalanche/training/templates/strategy_mixin_protocol.py
+++ b/avalanche/training/templates/strategy_mixin_protocol.py
@@ -46,6 +46,7 @@ class SGDStrategyProtocol(
     mbatch: Optional[TMBInput]
 
     mb_output: Optional[TMBOutput]
+    """Mini-batch output (typically the result of `self.forward()`)"""
 
     dataloader: Iterable[TMBInput]
 
@@ -94,6 +95,7 @@ class SupervisedStrategyProtocol(
     SGDStrategyProtocol[TSGDExperienceType, TMBInput, TMBOutput],
     Protocol[TSGDExperienceType, TMBInput, TMBOutput],
 ):
+    # TODO: How does this differ from mbatch[0]? Converted to tensor?
     mb_x: Tensor
 
     mb_y: Tensor

From f7a4934032cb70f5c1defb6c82b9721e1a8c1f1d Mon Sep 17 00:00:00 2001
From: Jim Neuendorf <jim.neuendorf@gmx.de>
Date: Thu, 12 Sep 2024 21:41:38 +0200
Subject: [PATCH 30/30] [training] add debug prints to contrastive learning

---
 avalanche/evaluation/metrics/accuracy.py       | 10 ++++++++++
 .../supervised_contrastive_replay.py           | 18 +++++++++++++++---
 avalanche/training/utils.py                    |  1 +
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/avalanche/evaluation/metrics/accuracy.py b/avalanche/evaluation/metrics/accuracy.py
index c41123359..6c1ce0c18 100644
--- a/avalanche/evaluation/metrics/accuracy.py
+++ b/avalanche/evaluation/metrics/accuracy.py
@@ -60,6 +60,9 @@ def update(
 
         :return: None.
         """
+        # print('[Accuracy] update')
+        # print('  true_y', true_y.shape)  # [256]
+        # print('  predicted_y', predicted_y.shape)  # [256, 2, 128]
         true_y = torch.as_tensor(true_y)
         predicted_y = torch.as_tensor(predicted_y)
 
@@ -75,6 +78,9 @@ def update(
             # Logits -> transform to labels
             true_y = torch.max(true_y, 1)[1]
 
+        # print('  true_y 2', true_y.shape)  # [256]
+        # print('  predicted_y 2', predicted_y.shape)  # [256, 128]
+
         true_positives = float(torch.sum(torch.eq(predicted_y, true_y)))
         total_patterns = len(true_y)
         self._mean_accuracy.update(true_positives / total_patterns, total_patterns)
@@ -218,6 +224,10 @@ def result(self) -> float:
         return self._metric.result()
 
     def update(self, strategy):
+        # print('[AccuracyPluginMetric] update]')
+        # print('  strategy:', strategy)
+        # print('  mb_output', strategy.mb_output.shape)
+        # print('  mb_y', strategy.mb_y.shape)
         self._metric.update(strategy.mb_output, strategy.mb_y)
 
 
diff --git a/avalanche/training/supervised/supervised_contrastive_replay.py b/avalanche/training/supervised/supervised_contrastive_replay.py
index f3fa10217..44474eea4 100644
--- a/avalanche/training/supervised/supervised_contrastive_replay.py
+++ b/avalanche/training/supervised/supervised_contrastive_replay.py
@@ -1,7 +1,6 @@
 from typing import Optional, Sequence
 
 import torch
-from torch.nn import Module
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader
 from torchvision.transforms import Compose, Lambda
@@ -129,6 +128,9 @@ def __init__(
 
     def criterion(self):
         if self.is_training:
+            # print(self.train_loss)
+            # print('mb_output', self.mb_output.shape)  # [384, 2, 128]
+            # print('mb_y', self.mb_y.shape)  # [256]
             return self.train_loss(self.mb_output, self.mb_y)
         else:
             return self.eval_loss(self.mb_output, self.mb_y)
@@ -139,9 +141,17 @@ def _before_forward(self, **kwargs):
         """
         assert self.is_training
         super()._before_forward(**kwargs)
-        mb_x_augmented = self.augmentations(self.mbatch[0])
+        # print('mbatch', len(self.mbatch), self.mbatch)
+        mb_x_augmented = self.augmentations(self.mb_x)
+        # print()
+        # print('before forward')
+        # print('x', self.mb_x.shape)  # [256, 1, 28, 28]
+        # print('y', self.mb_y.shape)  # [256]
+        # print('mb_x_augmented', mb_x_augmented.shape)  # [512, 1, 28, 28]
+
         # (batch_size*2, input_size)
-        self.mbatch[0] = torch.cat([self.mbatch[0], mb_x_augmented], dim=0)
+        self.mbatch[0] = torch.cat([self.mb_x, mb_x_augmented], dim=0)
+        # print('~x', self.mb_x.shape)  # [768, 1, 28, 28]
 
     def _after_forward(self, **kwargs):
         """
@@ -152,10 +162,12 @@ def _after_forward(self, **kwargs):
         super()._after_forward(**kwargs)
         assert self.mb_output.size(0) % 2 == 0
         original_batch_size = int(self.mb_output.size(0) / 2)
+        # print('[after forward] mb_output 1:', self.mb_output.shape)
         original_examples = self.mb_output[:original_batch_size]
         augmented_examples = self.mb_output[original_batch_size:]
         # (original_batch_size, 2, output_size)
         self.mb_output = torch.stack([original_examples, augmented_examples], dim=1)
+        # print('[after forward] mb_output 2:', self.mb_output.shape)
 
     def _after_training_exp(self, **kwargs):
         """Update NCM means"""
diff --git a/avalanche/training/utils.py b/avalanche/training/utils.py
index 37af0b009..a3d6e9ca3 100644
--- a/avalanche/training/utils.py
+++ b/avalanche/training/utils.py
@@ -75,6 +75,7 @@ def trigger_plugins(strategy: "BaseStrategyProtocol", event: str, **kwargs):
     """Call plugins on a specific callback"""
     for p in strategy.plugins:
         if hasattr(p, event):
+            # print('triggering plugin', p, event)
             getattr(p, event)(strategy, **kwargs)