microsoft · rlundeen2 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,6 +13,11 @@ repos:
         entry: python ./build_scripts/sanitize_notebook_paths.py
         language: python
         files: ^doc.*\.(ipynb)$
+      - id: strip-notebook-progress-bars
+        name: Strip Notebook Progress Bars
+        entry: python ./build_scripts/strip_notebook_progress_bars.py
+        language: python
+        files: ^doc.*\.(ipynb)$
       - id: validate-docs
         name: Validate Documentation Structure
         entry: python ./build_scripts/validate_docs.py

diff --git a/build_scripts/strip_notebook_progress_bars.py b/build_scripts/strip_notebook_progress_bars.py
@@ -0,0 +1,96 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import json
+import re
+import sys
+
+# tqdm text-mode progress bar patterns:
+# - "%|" separates percentage from the bar
+# - Block characters (━, █, ▏-▉) are used for the bar itself
+# - "\r" carriage returns are used for in-place updates
+_TQDM_PATTERNS = [
+    re.compile(r"%\|"),  # "  0%|" or " 50%|..."
+    re.compile(r"[━█▏▎▍▌▋▊▉]"),  # progress bar block characters
+]
+
+
+def _is_tqdm_line(line: str) -> bool:
+    """
+    Check if a line is part of a tqdm progress bar output.
+
+    Args:
+        line (str): A single line of text from stderr output.
+
+    Returns:
+        bool: True if the line matches tqdm progress bar patterns.
+    """
+    stripped = line.strip()
+    if not stripped or stripped == "\r":
+        # Bare carriage returns or blank lines between tqdm updates
+        return False
+    return any(pattern.search(line) for pattern in _TQDM_PATTERNS)
+
+
+def strip_notebook_progress_bars(file_path: str) -> bool:
+    """
+    Remove tqdm progress bar outputs from notebook cell stderr streams.
+
+    Strips stderr stream outputs that contain tqdm progress bar patterns.
+    If all lines in a stderr output are tqdm lines, the entire output is removed.
+    If only some lines are tqdm, those lines are stripped and the output is kept.
+
+    Args:
+        file_path (str): Path to the .ipynb file.
+
+    Returns:
+        bool: True if the file was modified.
+    """
+    if not file_path.endswith(".ipynb"):
+        return False
+
+    with open(file_path, encoding="utf-8") as f:
+        content = json.load(f)
+
+    modified = False
+
+    for cell in content.get("cells", []):
+        outputs = cell.get("outputs", [])
+        new_outputs = []
+
+        for output in outputs:
+            if output.get("output_type") == "stream" and output.get("name") == "stderr":
+                text_lines = output.get("text", [])
+                non_tqdm_lines = [line for line in text_lines if not _is_tqdm_line(line)]
+
+                if len(non_tqdm_lines) < len(text_lines):
+                    modified = True
+                    # Keep output only if there are meaningful non-tqdm lines
+                    remaining = [line for line in non_tqdm_lines if line.strip()]
+                    if remaining:
+                        output["text"] = non_tqdm_lines
+                        new_outputs.append(output)
+                    # else: drop the entire output (all tqdm or only whitespace left)
+                else:
+                    new_outputs.append(output)
+            else:
+                new_outputs.append(output)
+
+        if len(new_outputs) != len(outputs):
+            cell["outputs"] = new_outputs
+
+    if not modified:
+        return False
+
+    with open(file_path, "w", encoding="utf-8") as f:
+        json.dump(content, f, indent=1, ensure_ascii=False)
+        f.write("\n")
+
+    return True
+
+
+if __name__ == "__main__":
+    modified_files = [file_path for file_path in sys.argv[1:] if strip_notebook_progress_bars(file_path)]
+    if modified_files:
+        print("Stripped tqdm progress bars from:", modified_files)
+        sys.exit(1)
diff --git a/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb b/doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb
@@ -202,7 +202,6 @@
       "Example: azcopy copy './git/PyRIT' 'https://romanlutz0437468309.blob.core.windows.net/3f52e8b9-0bac-4c48-9e4a-a92e85a582c4-10s61nn9uso4b2p89xjypawyc7/PyRIT' \n",
       "\n",
       "See https://learn.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.\n",
-      "\u001b[32mUploading PyRIT (194.65 MBs): 100%|##########| 194652493/194652493 [01:19<00:00, 2447407.71it/s] \n",
       "\u001b[39m\n",
       "\n"
      ]

diff --git a/doc/code/front_end/1_pyrit_scan.ipynb b/doc/code/front_end/1_pyrit_scan.ipynb
@@ -545,16 +545,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Loading datasets - this can take a few minutes:   0%|          | 0/58 [00:00<?, ?dataset/s]\n",
-      "Loading datasets - this can take a few minutes:   2%|▏         | 1/58 [00:00<00:33,  1.72dataset/s]\n",
-      "Loading datasets - this can take a few minutes:  29%|██▉       | 17/58 [00:00<00:01, 32.77dataset/s]\n",
-      "Loading datasets - this can take a few minutes:  60%|██████    | 35/58 [00:00<00:00, 52.67dataset/s]\n",
-      "Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:00<00:00, 65.13dataset/s]\n",
       "\n",
-      "Executing RedTeamAgent:   0%|          | 0/2 [00:00<?, ?attack/s]\n",
-      "Executing RedTeamAgent:  50%|█████     | 1/2 [00:07<00:07,  7.14s/attack]\n",
-      "Executing RedTeamAgent: 100%|██████████| 2/2 [00:15<00:00,  7.67s/attack]\n",
-      "Executing RedTeamAgent: 100%|██████████| 2/2 [00:15<00:00,  7.59s/attack]\n",
       "ERROR:asyncio:Unclosed client session\n",
       "client_session: <aiohttp.client.ClientSession object at 0x000001A51ECDEB50>\n"
      ]

diff --git a/doc/code/memory/8_seed_database.ipynb b/doc/code/memory/8_seed_database.ipynb
@@ -39,38 +39,6 @@
    "id": "2",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   0%|                                                                                                                    | 0/41 [00:00<?, ?dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   2%|██▋                                                                                                         | 1/41 [00:00<00:20,  1.91dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  49%|████████████████████████████████████████████████████▏                                                      | 20/41 [00:00<00:00, 39.77dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [00:00<00:00, 61.96dataset/s]"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",

diff --git a/doc/code/registry/1_class_registry.ipynb b/doc/code/registry/1_class_registry.ipynb
@@ -102,13 +102,6 @@
       "Loaded environment file: ./.pyrit/.env\n"
      ]
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:00<00:00, 68.45dataset/s]\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",

diff --git a/doc/code/scenarios/1_red_team_agent.ipynb b/doc/code/scenarios/1_red_team_agent.ipynb
@@ -78,86 +78,6 @@
     }
    },
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   0%|          | 0/58 [00:00<?, ?dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   2%|▏         | 1/58 [00:00<00:14,  4.02dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   9%|▊         | 5/58 [00:00<00:03, 16.33dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  16%|█▌        | 9/58 [00:00<00:02, 22.26dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  22%|██▏       | 13/58 [00:00<00:01, 26.04dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  29%|██▉       | 17/58 [00:00<00:01, 28.43dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  36%|███▌      | 21/58 [00:00<00:01, 30.13dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  43%|████▎     | 25/58 [00:00<00:01, 29.32dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  60%|██████    | 35/58 [00:01<00:00, 47.39dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:01<00:00, 52.93dataset/s]"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",

diff --git a/doc/code/scenarios/2_content_harms.ipynb b/doc/code/scenarios/2_content_harms.ipynb
@@ -60,102 +60,6 @@
       "Loaded environment file: ./.pyrit/.env.local\n"
      ]
     },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   0%|          | 0/58 [00:00<?, ?dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   2%|▏         | 1/58 [00:00<00:14,  4.02dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:   7%|▋         | 4/58 [00:00<00:04, 12.77dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  10%|█         | 6/58 [00:00<00:03, 14.46dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  16%|█▌        | 9/58 [00:00<00:02, 16.57dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  19%|█▉        | 11/58 [00:00<00:02, 17.10dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  22%|██▏       | 13/58 [00:00<00:02, 17.29dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  26%|██▌       | 15/58 [00:00<00:02, 17.46dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  33%|███▎      | 19/58 [00:01<00:01, 22.47dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  40%|███▉      | 23/58 [00:01<00:01, 25.56dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes:  45%|████▍     | 26/58 [00:01<00:01, 23.97dataset/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:01<00:00, 40.47dataset/s]"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",