Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ repos:
entry: python ./build_scripts/sanitize_notebook_paths.py
language: python
files: ^doc.*\.(ipynb)$
- id: strip-notebook-progress-bars
name: Strip Notebook Progress Bars
entry: python ./build_scripts/strip_notebook_progress_bars.py
language: python
files: ^doc.*\.(ipynb)$
- id: validate-docs
name: Validate Documentation Structure
entry: python ./build_scripts/validate_docs.py
Expand Down
96 changes: 96 additions & 0 deletions build_scripts/strip_notebook_progress_bars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import json
import re
import sys

# tqdm text-mode progress bar patterns:
# - "%|" separates percentage from the bar
# - Block characters (━, █, ▏-▉) are used for the bar itself
# - "\r" carriage returns are used for in-place updates
_TQDM_PATTERNS = [
re.compile(r"%\|"), # " 0%|" or " 50%|..."
re.compile(r"[━█▏▎▍▌▋▊▉]"), # progress bar block characters
]


def _is_tqdm_line(line: str) -> bool:
"""
Check if a line is part of a tqdm progress bar output.

Args:
line (str): A single line of text from stderr output.

Returns:
bool: True if the line matches tqdm progress bar patterns.
"""
stripped = line.strip()
if not stripped or stripped == "\r":
# Bare carriage returns or blank lines between tqdm updates
return False
return any(pattern.search(line) for pattern in _TQDM_PATTERNS)


def strip_notebook_progress_bars(file_path: str) -> bool:
"""
Remove tqdm progress bar outputs from notebook cell stderr streams.

Strips stderr stream outputs that contain tqdm progress bar patterns.
If all lines in a stderr output are tqdm lines, the entire output is removed.
If only some lines are tqdm, those lines are stripped and the output is kept.

Args:
file_path (str): Path to the .ipynb file.

Returns:
bool: True if the file was modified.
"""
if not file_path.endswith(".ipynb"):
return False

with open(file_path, encoding="utf-8") as f:
content = json.load(f)

modified = False

for cell in content.get("cells", []):
outputs = cell.get("outputs", [])
new_outputs = []

for output in outputs:
if output.get("output_type") == "stream" and output.get("name") == "stderr":
text_lines = output.get("text", [])
non_tqdm_lines = [line for line in text_lines if not _is_tqdm_line(line)]

if len(non_tqdm_lines) < len(text_lines):
modified = True
# Keep output only if there are meaningful non-tqdm lines
remaining = [line for line in non_tqdm_lines if line.strip()]
if remaining:
output["text"] = non_tqdm_lines
new_outputs.append(output)
# else: drop the entire output (all tqdm or only whitespace left)
else:
new_outputs.append(output)
else:
new_outputs.append(output)

if len(new_outputs) != len(outputs):
cell["outputs"] = new_outputs

if not modified:
return False

with open(file_path, "w", encoding="utf-8") as f:
json.dump(content, f, indent=1, ensure_ascii=False)
f.write("\n")

return True


if __name__ == "__main__":
modified_files = [file_path for file_path in sys.argv[1:] if strip_notebook_progress_bars(file_path)]
if modified_files:
print("Stripped tqdm progress bars from:", modified_files)
sys.exit(1)
1 change: 0 additions & 1 deletion doc/code/auxiliary_attacks/1_gcg_azure_ml.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@
"Example: azcopy copy './git/PyRIT' 'https://romanlutz0437468309.blob.core.windows.net/3f52e8b9-0bac-4c48-9e4a-a92e85a582c4-10s61nn9uso4b2p89xjypawyc7/PyRIT' \n",
"\n",
"See https://learn.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.\n",
"\u001b[32mUploading PyRIT (194.65 MBs): 100%|##########| 194652493/194652493 [01:19<00:00, 2447407.71it/s] \n",
"\u001b[39m\n",
"\n"
]
Expand Down
9 changes: 0 additions & 9 deletions doc/code/front_end/1_pyrit_scan.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -545,16 +545,7 @@
"output_type": "stream",
"text": [
"\n",
"Loading datasets - this can take a few minutes: 0%| | 0/58 [00:00<?, ?dataset/s]\n",
"Loading datasets - this can take a few minutes: 2%|▏ | 1/58 [00:00<00:33, 1.72dataset/s]\n",
"Loading datasets - this can take a few minutes: 29%|██▉ | 17/58 [00:00<00:01, 32.77dataset/s]\n",
"Loading datasets - this can take a few minutes: 60%|██████ | 35/58 [00:00<00:00, 52.67dataset/s]\n",
"Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:00<00:00, 65.13dataset/s]\n",
"\n",
"Executing RedTeamAgent: 0%| | 0/2 [00:00<?, ?attack/s]\n",
"Executing RedTeamAgent: 50%|█████ | 1/2 [00:07<00:07, 7.14s/attack]\n",
"Executing RedTeamAgent: 100%|██████████| 2/2 [00:15<00:00, 7.67s/attack]\n",
"Executing RedTeamAgent: 100%|██████████| 2/2 [00:15<00:00, 7.59s/attack]\n",
"ERROR:asyncio:Unclosed client session\n",
"client_session: <aiohttp.client.ClientSession object at 0x000001A51ECDEB50>\n"
]
Expand Down
32 changes: 0 additions & 32 deletions doc/code/memory/8_seed_database.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,38 +39,6 @@
"id": "2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 0%| | 0/41 [00:00<?, ?dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 2%|██▋ | 1/41 [00:00<00:20, 1.91dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 49%|████████████████████████████████████████████████████▏ | 20/41 [00:00<00:00, 39.77dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [00:00<00:00, 61.96dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
Expand Down
7 changes: 0 additions & 7 deletions doc/code/registry/1_class_registry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,6 @@
"Loaded environment file: ./.pyrit/.env\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:00<00:00, 68.45dataset/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand Down
80 changes: 0 additions & 80 deletions doc/code/scenarios/1_red_team_agent.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,86 +78,6 @@
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 0%| | 0/58 [00:00<?, ?dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 2%|▏ | 1/58 [00:00<00:14, 4.02dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 9%|▊ | 5/58 [00:00<00:03, 16.33dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 16%|█▌ | 9/58 [00:00<00:02, 22.26dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 22%|██▏ | 13/58 [00:00<00:01, 26.04dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 29%|██▉ | 17/58 [00:00<00:01, 28.43dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 36%|███▌ | 21/58 [00:00<00:01, 30.13dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 43%|████▎ | 25/58 [00:00<00:01, 29.32dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 60%|██████ | 35/58 [00:01<00:00, 47.39dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:01<00:00, 52.93dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
Expand Down
96 changes: 0 additions & 96 deletions doc/code/scenarios/2_content_harms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -60,102 +60,6 @@
"Loaded environment file: ./.pyrit/.env.local\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 0%| | 0/58 [00:00<?, ?dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 2%|▏ | 1/58 [00:00<00:14, 4.02dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 7%|▋ | 4/58 [00:00<00:04, 12.77dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 10%|█ | 6/58 [00:00<00:03, 14.46dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 16%|█▌ | 9/58 [00:00<00:02, 16.57dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 19%|█▉ | 11/58 [00:00<00:02, 17.10dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 22%|██▏ | 13/58 [00:00<00:02, 17.29dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 26%|██▌ | 15/58 [00:00<00:02, 17.46dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 33%|███▎ | 19/58 [00:01<00:01, 22.47dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 40%|███▉ | 23/58 [00:01<00:01, 25.56dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 45%|████▍ | 26/58 [00:01<00:01, 23.97dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading datasets - this can take a few minutes: 100%|██████████| 58/58 [00:01<00:00, 40.47dataset/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
Expand Down
Loading
Loading