senwu · ANarayan · Oct 29, 2021 · Oct 29, 2021 · Oct 29, 2021 · Oct 29, 2021
diff --git a/src/emmental/contrib/slicing/modules/utils.py b/src/emmental/contrib/slicing/modules/utils.py
@@ -23,9 +23,7 @@ def ce_loss(
       Loss.
     """
     return F.cross_entropy(
-        intermediate_output_dict[module_name][0],
-        Y.view(-1) - 1,
-        weight,
+        intermediate_output_dict[module_name][0], Y.view(-1) - 1, weight
     )
 
 

diff --git a/src/emmental/logging/checkpointer.py b/src/emmental/logging/checkpointer.py
@@ -137,9 +137,7 @@ def checkpoint(
 
         # Save optimizer state
         optimizer_path = f"{self.checkpoint_path}/checkpoint_{iteration}.optimizer.pth"
-        optimizer_dict = {
-            "optimizer": optimizer.state_dict(),
-        }
+        optimizer_dict = {"optimizer": optimizer.state_dict()}
         torch.save(optimizer_dict, optimizer_path)
 
         # Save lr_scheduler state
@@ -165,10 +163,7 @@ def checkpoint(
                     f"{self.checkpoint_path}/best_model_"
                     f"{metric.replace('/', '_')}.model.pth"
                 )
-                copyfile(
-                    model_path,
-                    best_metric_model_path,
-                )
+                copyfile(model_path, best_metric_model_path)
                 logger.info(
                     f"Save best model of metric {metric} to {best_metric_model_path}"
                 )

diff --git a/src/emmental/meta.py b/src/emmental/meta.py
@@ -251,10 +251,7 @@ def check_config() -> None:
             Meta.config["logging_config"]["evaluation_freq"] = new_evaluation_freq
 
         if (
-            Meta.config["logging_config"]["counter_unit"]
-            in [
-                "epoch",
-            ]
+            Meta.config["logging_config"]["counter_unit"] in ["epoch"]
             and isinstance(Meta.config["logging_config"]["evaluation_freq"], int)
             and Meta.config["logging_config"]["writer_config"]["write_loss_per_step"]
         ):

diff --git a/src/emmental/model.py b/src/emmental/model.py
@@ -20,6 +20,7 @@
 from emmental.utils.utils import (
     array_to_numpy,
     construct_identifier,
+    merge_objects,
     move_to_device,
     prob_to_pred,
 )
@@ -323,7 +324,7 @@ def forward(  # type: ignore
             Dict[str, Tensor],
             Dict[str, Union[ndarray, List[ndarray]]],
             Dict[str, Union[ndarray, List[ndarray]]],
-            Dict[str, Dict[str, Union[ndarray, List]]],
+            Dict[str, Dict[str, Union[ndarray, List, int, float, Dict]]],
         ],
         Tuple[
             Dict[str, List[str]],
@@ -356,7 +357,7 @@ def forward(  # type: ignore
         prob_dict: Dict[str, Union[ndarray, List[ndarray]]] = (
             defaultdict(list) if return_probs else None
         )
-        out_dict: Dict[str, Dict[str, Union[ndarray, List]]] = (
+        out_dict: Dict[str, Dict[str, Union[ndarray, List, int, float, Dict]]] = (
             defaultdict(lambda: defaultdict(list)) if return_action_outputs else None
         )
 
@@ -378,8 +379,7 @@ def forward(  # type: ignore
                 loss_dict[task_name] = self.loss_funcs[task_name](
                     output_dict,
                     move_to_device(
-                        Y_dict[label_name],
-                        Meta.config["model_config"]["device"],
+                        Y_dict[label_name], Meta.config["model_config"]["device"]
                     )
                     if Y_dict is not None and label_name is not None
                     else None,
@@ -403,9 +403,11 @@ def forward(  # type: ignore
                 and self.action_outputs[task_name] is not None
             ):
                 for action_name, output_index in self.action_outputs[task_name]:
-                    out_dict[task_name][f"{action_name}_{output_index}"] = (
-                        output_dict[action_name][output_index].cpu().detach().numpy()
+                    action_output = output_dict[action_name][output_index]
+                    action_output = move_to_device(
+                        action_output, -1, detach=True, convert_to_numpy=True
                     )
+                    out_dict[task_name][f"{action_name}_{output_index}"] = action_output
 
         if return_action_outputs:
             return uid_dict, loss_dict, prob_dict, gold_dict, out_dict
@@ -446,7 +448,7 @@ def predict(
         pred_dict: Dict[str, Union[ndarray, List[ndarray]]] = (
             defaultdict(list) if return_preds else None
         )
-        out_dict: Dict[str, Dict[str, List[Union[ndarray, int, float]]]] = (
+        out_dict: Dict[str, Dict[str, Union[ndarray, List, int, float, Dict]]] = (
             defaultdict(lambda: defaultdict(list)) if return_action_outputs else None
         )
         loss_dict: Dict[str, Union[ndarray, float]] = (
@@ -526,8 +528,13 @@ def predict(
                 if return_action_outputs and out_bdict:
                     for task_name in out_bdict.keys():
                         for action_name in out_bdict[task_name].keys():
-                            out_dict[task_name][action_name].extend(
+                            out_dict[task_name][action_name] = (
                                 out_bdict[task_name][action_name]
+                                if out_dict[task_name][action_name] == []
+                                else merge_objects(
+                                    out_dict[task_name][action_name],
+                                    out_bdict[task_name][action_name],
+                                )
                             )
 
         # Calculate average loss
@@ -536,11 +543,7 @@ def predict(
                 if not isinstance(loss_dict[task_name], list):
                     loss_dict[task_name] /= len(uid_dict[task_name])
 
-        res = {
-            "uids": uid_dict,
-            "golds": gold_dict,
-            "losses": loss_dict,
-        }
+        res = {"uids": uid_dict, "golds": gold_dict, "losses": loss_dict}
 
         if return_probs:
             for task_name in prob_dict.keys():
@@ -734,11 +737,7 @@ def save(
         if Meta.config["meta_config"]["verbose"] and verbose:
             logger.info(f"[{self.name}] Model saved in {model_path}")
 
-    def load(
-        self,
-        model_path: str,
-        verbose: bool = True,
-    ) -> None:
+    def load(self, model_path: str, verbose: bool = True) -> None:
         """Load model state_dict from file and reinitialize the model weights.
 
         Args:

diff --git a/src/emmental/utils/parse_args.py b/src/emmental/utils/parse_args.py
@@ -819,10 +819,7 @@ def parse_args(parser: Optional[ArgumentParser] = None) -> ArgumentParser:
     )
 
     logging_config.add_argument(
-        "--wandb_run_name",
-        type=nullable_string,
-        default=None,
-        help="Wandb run name",
+        "--wandb_run_name", type=nullable_string, default=None, help="Wandb run name"
     )
 
     logging_config.add_argument(

diff --git a/src/emmental/utils/utils.py b/src/emmental/utils/utils.py
@@ -122,7 +122,10 @@ def pred_to_prob(preds: ndarray, n_classes: int) -> ndarray:
 
 
 def move_to_device(
-    obj: Any, device: Optional[Union[int, str, torch.device]] = -1
+    obj: Union[Tensor, ndarray, dict, list, tuple],
+    device: Optional[Union[int, str, torch.device]] = -1,
+    detach: bool = False,
+    convert_to_numpy: bool = False,
 ) -> Any:
     """Move object to specified device.
 
@@ -147,17 +150,98 @@ def move_to_device(
         device = torch.device("cpu")
 
     if isinstance(obj, torch.Tensor):
-        return obj.to(device)
+        obj.to(device)
+        if detach:
+            obj = obj.detach()
+        if convert_to_numpy:
+            obj = obj.numpy()
+        return obj
     elif isinstance(obj, dict):
-        return {key: move_to_device(value, device) for key, value in obj.items()}
+        return {
+            key: move_to_device(value, device, detach, convert_to_numpy)
+            for key, value in obj.items()
+        }
     elif isinstance(obj, list):
-        return [move_to_device(item, device) for item in obj]
+        return [move_to_device(item, device, detach, convert_to_numpy) for item in obj]
     elif isinstance(obj, tuple):
-        return tuple([move_to_device(item, device) for item in obj])
+        return tuple(
+            [move_to_device(item, device, detach, convert_to_numpy) for item in obj]
+        )
     else:
         return obj
 
 
+def merge_objects(obj_1: Any, obj_2: Any) -> Any:
+    """Merge two objects of the same type.
+
+    Given two objects of the same type and structure, merges the second object
+    into the first object. If either of the objects is empty, the non-empty
+    object is returned. Supported types include torch tensors, numpy arrays
+    lists, dicts and tuples. For tensors and arrays, objects are merged
+    along the 1st dimension:
+
+        obj_1: torch.Tensor([1,2]), obj_2: torch.Tensor([2,3])
+        merged object: torch.Tensor([[1,2],[2,3]])
+
+    Args:
+      obj_1: first object.
+      obj_2: second object to be merged into the first object.
+
+    Returns:
+      an object reflecting the merged output of the two inputs.
+    """
+    if type(obj_1) != type(obj_2):
+        raise TypeError(
+            f"Cannot merge object of type {type(obj_1)} "
+            f"with object of type {type(obj_2)}."
+        )
+    if isinstance(obj_1, torch.Tensor):
+        # empty edge case
+        if not obj_1.size()[0]:
+            return obj_2
+        elif not obj_2.size()[0]:
+            return obj_1
+
+        # unsqueeze of object is 1D and not empty
+        if len(obj_1.shape) == 1:
+            obj_1 = obj_1.unsqueeze(0)
+        if len(obj_2.shape) == 1:
+            obj_2 = obj_2.unsqueeze(0)
+        return torch.cat([obj_1, obj_2])
+    elif isinstance(obj_1, np.ndarray):
+        # empty edge case
+        if not obj_1.size:
+            return obj_2
+        elif not obj_2.size:
+            return obj_1
+
+        # expand if array has 1 dimension
+        if len(obj_1.shape) == 1:
+            obj_1 = np.expand_dims(obj_1, axis=0)
+        if len(obj_2.shape) == 1:
+            obj_2 = np.expand_dims(obj_2, axis=0)
+        return np.concatenate((obj_1, obj_2))
+    elif isinstance(obj_1, list):
+        obj_1.extend(obj_2)
+        return obj_1
+    elif isinstance(obj_1, dict):
+        if not obj_1:
+            return obj_2
+        elif not obj_2:
+            return obj_1
+
+        for key, value in obj_1.items():
+            obj_1[key] = merge_objects(value, obj_2[key])
+        return obj_1
+    elif isinstance(obj_1, tuple):
+        merged_tuple_vals = []
+        for idx in range(len(obj_1)):
+            merged_tuple_vals.append(merge_objects(obj_1[idx], obj_2[idx]))
+        return tuple(merged_tuple_vals)
+    else:
+        return obj_1
+
+
 def array_to_numpy(
     array: Union[ndarray, List[Any], Tensor], flatten: bool = False
 ) -> ndarray: