Merge pull request #325 from JdeRobot/issue-323

dpascualhe · web-flow · commit 93b48eba95c7 · 2025-11-05T18:55:00.000+01:00
Issue 323 - Changes made in preprocessing images for torch detection models.
diff --git a/app.py b/app.py
@@ -187,7 +187,6 @@ def browse_folder():
                     "Confidence Threshold",
                     min_value=0.0,
                     max_value=1.0,
-                    value=st.session_state.get("confidence_threshold", 0.5),
                     step=0.01,
                     key="confidence_threshold",
                     help="Minimum confidence score for detections",
@@ -196,7 +195,6 @@ def browse_folder():
                     "NMS Threshold",
                     min_value=0.0,
                     max_value=1.0,
-                    value=st.session_state.get("nms_threshold", 0.5),
                     step=0.01,
                     key="nms_threshold",
                     help="Non-maximum suppression threshold",
@@ -205,15 +203,22 @@ def browse_folder():
                     "Max Detections/Image",
                     min_value=1,
                     max_value=1000,
-                    value=st.session_state.get("max_detections", 100),
                     step=1,
                     key="max_detections",
                 )
+                st.number_input(
+                    "Image Resize Height",
+                    min_value=1,
+                    max_value=4096,
+                    value=640,
+                    step=1,
+                    key="resize_height",
+                    help="Height to resize images for inference",
+                )
             with col2:
                 st.selectbox(
                     "Device",
                     ["cpu", "cuda", "mps"],
-                    index=0 if st.session_state.get("device", "cpu") == "cpu" else 1,
                     key="device",
                 )
                 st.selectbox(
@@ -231,20 +236,26 @@ def browse_folder():
                     "Batch Size",
                     min_value=1,
                     max_value=256,
-                    value=st.session_state.get("batch_size", 1),
                     step=1,
                     key="batch_size",
                 )
                 st.number_input(
                     "Evaluation Step",
                     min_value=0,
                     max_value=1000,
-                    value=st.session_state.get("evaluation_step", 10),
                     step=1,
                     key="evaluation_step",
                     help="Update UI with intermediate metrics every N images (0 = disable intermediate updates)",
                 )
-
+                st.number_input(
+                    "Image Resize Width",
+                    min_value=1,
+                    max_value=4096,
+                    value=640,
+                    step=1,
+                    key="resize_width",
+                    help="Width to resize images for inference",
+                )
         # Load model action in sidebar
         from detectionmetrics.models.torch_detection import TorchImageDetectionModel
         import json, tempfile
@@ -292,6 +303,8 @@ def browse_folder():
                     device = st.session_state.get("device", "cpu")
                     batch_size = int(st.session_state.get("batch_size", 1))
                     evaluation_step = int(st.session_state.get("evaluation_step", 5))
+                    resize_height = int(st.session_state.get("resize_height", 640))
+                    resize_width = int(st.session_state.get("resize_width", 640))
                     model_format = st.session_state.get("model_format", "torchvision")
                     config_data = {
                         "confidence_threshold": confidence_threshold,
@@ -300,6 +313,8 @@ def browse_folder():
                         "device": device,
                         "batch_size": batch_size,
                         "evaluation_step": evaluation_step,
+                        "resize_height": resize_height,
+                        "resize_width": resize_width,
                         "model_format": model_format.lower(),
                     }
                     with tempfile.NamedTemporaryFile(
diff --git a/detectionmetrics/models/torch_detection.py b/detectionmetrics/models/torch_detection.py
@@ -260,16 +260,21 @@ def __init__(
         # Build input transforms (resize, normalize, etc.)
         self.transform_input = []
 
+        # Default resize to 640x640 if not specified
         if "resize" in self.model_cfg:
-            self.transform_input += [
-                transforms.Resize(
-                    size=(
-                        self.model_cfg["resize"].get("height", None),
-                        self.model_cfg["resize"].get("width", None),
-                    ),
-                    interpolation=transforms.InterpolationMode.BILINEAR,
-                )
-            ]
+            resize_height = self.model_cfg["resize"].get("height", 640)
+            resize_width = self.model_cfg["resize"].get("width", 640)
+        else:
+            # Default to 640x640 when no resize is specified
+            resize_height = 640
+            resize_width = 640
+            
+        self.transform_input += [
+            transforms.Resize(
+                size=(resize_height, resize_width),
+                interpolation=transforms.InterpolationMode.BILINEAR,
+            )
+        ]
 
         if "crop" in self.model_cfg:
             crop_size = (