From 0114b779decfaadf0888c002eae1a19334258dc6 Mon Sep 17 00:00:00 2001
From: DoubleMathew <mmathew23@gmail.com>
Date: Tue, 18 Nov 2025 13:55:52 -0600
Subject: [PATCH] update grpo notebooks to have
 per_device_train_batch_size=num_generations

---
 nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb     |  10 +-
 nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb     |   2 +-
 nb/Gemma3_(1B)-GRPO.ipynb                     | 169 ++++++++----------
 nb/Gemma3_(4B)-Vision-GRPO.ipynb              |   4 +-
 ...rse-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb |   2 +-
 ...rse-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb |  10 +-
 ...rse-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb |   2 +-
 nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb  | 169 ++++++++----------
 ...gFace Course-Gemma3_(4B)-Vision-GRPO.ipynb |   4 +-
 ...uggingFace Course-Llama3.1_(8B)-GRPO.ipynb |   4 +-
 ...ngFace Course-Mistral_v0.3_(7B)-GRPO.ipynb |   2 +-
 nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb  |   2 +-
 nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb |   2 +-
 ...uggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb |   2 +-
 nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb   |   4 +-
 ...ace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb |   4 +-
 ...uggingFace Course-gpt-oss-(20B)-GRPO.ipynb |   2 +-
 ...gFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb |   2 +-
 ...gle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb |   2 +-
 ...gle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb |  10 +-
 ...gle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb |   2 +-
 nb/Kaggle-Gemma3_(1B)-GRPO.ipynb              | 154 +++++++---------
 nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb       |   4 +-
 nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb            |   4 +-
 nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb        |   2 +-
 nb/Kaggle-Phi_4_(14B)-GRPO.ipynb              |   2 +-
 nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb             |   2 +-
 nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb            |   2 +-
 nb/Kaggle-Qwen3_(4B)-GRPO.ipynb               |   4 +-
 nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb     |   4 +-
 nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb            |   2 +-
 nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb       |   2 +-
 nb/Llama3.1_(8B)-GRPO.ipynb                   |   4 +-
 nb/Mistral_v0.3_(7B)-GRPO.ipynb               |   2 +-
 nb/Phi_4_(14B)-GRPO.ipynb                     |   2 +-
 nb/Qwen2.5_(3B)-GRPO.ipynb                    |   2 +-
 nb/Qwen2_5_7B_VL_GRPO.ipynb                   |   2 +-
 nb/Qwen3_(4B)-GRPO.ipynb                      |   4 +-
 nb/gpt-oss-(20B)-GRPO.ipynb                   |   2 +-
 nb/gpt_oss_(20B)_GRPO_BF16.ipynb              |   2 +-
 .../Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb    |   2 +-
 .../Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb    |  10 +-
 .../DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb    |   2 +-
 original_template/Gemma3_(1B)-GRPO.ipynb      |   4 +-
 .../Gemma3_(4B)-Vision-GRPO.ipynb             |   4 +-
 original_template/Llama3.1_(8B)-GRPO.ipynb    |   4 +-
 .../Mistral_v0.3_(7B)-GRPO.ipynb              |   2 +-
 original_template/Phi_4_(14B)-GRPO.ipynb      |   2 +-
 original_template/Qwen2.5_(3B)-GRPO.ipynb     |   2 +-
 original_template/Qwen2_5_7B_VL_GRPO.ipynb    |   2 +-
 original_template/Qwen3_(4B)-GRPO.ipynb       |   4 +-
 .../Qwen3_VL_(8B)-Vision-GRPO.ipynb           |   4 +-
 original_template/gpt-oss-(20B)-GRPO.ipynb    |   2 +-
 .../gpt_oss_(20B)_GRPO_BF16.ipynb             |   2 +-
 .../Advanced_Llama3_2_(3B)_GRPO_LoRA.py       |   2 +-
 .../DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py       |   2 +-
 python_scripts/Gemma3_(1B)-GRPO.py            |   4 +-
 python_scripts/Gemma3_(4B)-Vision-GRPO.py     |   4 +-
 ...Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py |   2 +-
 ...Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py |   2 +-
 ...Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py |   2 +-
 .../HuggingFace Course-Gemma3_(1B)-GRPO.py    |   4 +-
 ...gingFace Course-Gemma3_(4B)-Vision-GRPO.py |   4 +-
 .../HuggingFace Course-Llama3.1_(8B)-GRPO.py  |   4 +-
 ...ggingFace Course-Mistral_v0.3_(7B)-GRPO.py |   2 +-
 .../HuggingFace Course-Phi_4_(14B)-GRPO.py    |   2 +-
 .../HuggingFace Course-Qwen2.5_(3B)-GRPO.py   |   2 +-
 .../HuggingFace Course-Qwen2_5_7B_VL_GRPO.py  |   2 +-
 .../HuggingFace Course-Qwen3_(4B)-GRPO.py     |   4 +-
 ...ngFace Course-Qwen3_VL_(8B)-Vision-GRPO.py |   4 +-
 .../HuggingFace Course-gpt-oss-(20B)-GRPO.py  |   2 +-
 ...gingFace Course-gpt_oss_(20B)_GRPO_BF16.py |   2 +-
 ...Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py |   2 +-
 ...Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py |   2 +-
 ...Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py |   2 +-
 python_scripts/Kaggle-Gemma3_(1B)-GRPO.py     |   4 +-
 .../Kaggle-Gemma3_(4B)-Vision-GRPO.py         |   4 +-
 python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py   |   4 +-
 .../Kaggle-Mistral_v0.3_(7B)-GRPO.py          |   2 +-
 python_scripts/Kaggle-Phi_4_(14B)-GRPO.py     |   2 +-
 python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py    |   2 +-
 python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py   |   2 +-
 python_scripts/Kaggle-Qwen3_(4B)-GRPO.py      |   4 +-
 .../Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py       |   4 +-
 python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py   |   2 +-
 .../Kaggle-gpt_oss_(20B)_GRPO_BF16.py         |   2 +-
 python_scripts/Llama3.1_(8B)-GRPO.py          |   4 +-
 python_scripts/Mistral_v0.3_(7B)-GRPO.py      |   2 +-
 python_scripts/Phi_4_(14B)-GRPO.py            |   2 +-
 python_scripts/Qwen2.5_(3B)-GRPO.py           |   2 +-
 python_scripts/Qwen2_5_7B_VL_GRPO.py          |   2 +-
 python_scripts/Qwen3_(4B)-GRPO.py             |   4 +-
 python_scripts/gpt-oss-(20B)-GRPO.py          |   2 +-
 python_scripts/gpt_oss_(20B)_GRPO_BF16.py     |   2 +-
 94 files changed, 346 insertions(+), 420 deletions(-)

diff --git a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index 4e990e33..7cbaeea4 100644
--- a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
+++ b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
@@ -3,8 +3,8 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "view-in-github",
-    "colab_type": "text"
+    "colab_type": "text",
+    "id": "view-in-github"
    },
    "source": [
     "<a href=\"https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@@ -1257,7 +1257,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -12746,8 +12746,8 @@
   "accelerator": "GPU",
   "colab": {
    "gpuType": "T4",
-   "provenance": [],
-   "include_colab_link": true
+   "include_colab_link": true,
+   "provenance": []
   },
   "kernelspec": {
    "display_name": "Python 3",
diff --git a/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
index 30b813a1..66f059c7 100644
--- a/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
+++ b/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
@@ -1711,7 +1711,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Gemma3_(1B)-GRPO.ipynb b/nb/Gemma3_(1B)-GRPO.ipynb
index 0405b1c4..e933b423 100644
--- a/nb/Gemma3_(1B)-GRPO.ipynb
+++ b/nb/Gemma3_(1B)-GRPO.ipynb
@@ -8,7 +8,7 @@
     "<div class=\"align-center\">\n",
     "<a href=\"https://unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
     "<a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord button.png\" width=\"145\"></a>\n",
-    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐\n",
+    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + \u2b50 <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\n",
     "</div>\n",
     "\n",
     "To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n",
@@ -51,42 +51,14 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "%%capture\n",
-    "import os\n",
-    "os.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n",
-    "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n",
-    "    # If you're not in Colab, just use pip install or uv pip install\n",
-    "    !pip install unsloth vllm\n",
-    "else:\n",
-    "    pass # For Colab / Kaggle, we need extra instructions hidden below \\/"
-   ]
+   "source": "%%capture\nimport os\nos.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n    # If you're not in Colab, just use pip install or uv pip install\n    !pip install unsloth vllm\nelse:\n    pass # For Colab / Kaggle, we need extra instructions hidden below \\/"
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "#@title Colab Extra Install { display-mode: \"form\" }\n",
-    "%%capture\n",
-    "import os\n",
-    "!pip install --upgrade -qqq uv\n",
-    "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n",
-    "    # If you're not in Colab, just use pip install!\n",
-    "    !pip install unsloth vllm\n",
-    "else:\n",
-    "    try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n",
-    "    except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n",
-    "    try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n",
-    "    except: is_t4 = False\n",
-    "    get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n",
-    "    !uv pip install -qqq --upgrade \\\n",
-    "        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n",
-    "    !uv pip install -qqq {get_triton}\n",
-    "!uv pip install transformers==4.56.2\n",
-    "!uv pip install --no-deps trl==0.22.2"
-   ]
+   "source": "#@title Colab Extra Install { display-mode: \"form\" }\n%%capture\nimport os\n!pip install --upgrade -qqq uv\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n    # If you're not in Colab, just use pip install!\n    !pip install unsloth vllm\nelse:\n    try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n    except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n    try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n    except: is_t4 = False\n    get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n    !uv pip install -qqq --upgrade \\\n        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n    !uv pip install -qqq {get_triton}\n!uv pip install transformers==4.56.2\n!uv pip install --no-deps trl==0.22.2"
   },
   {
    "cell_type": "markdown",
@@ -201,8 +173,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
-      "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
+      "\ud83e\udda5 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+      "\ud83e\udda5 Unsloth Zoo will now patch everything to make training faster!\n",
       "INFO 03-19 15:51:40 [__init__.py:256] Automatically detected platform cuda.\n",
       "==((====))==  Unsloth 2025.3.17: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.0.\n",
       "   \\\\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n",
@@ -1060,7 +1032,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1091,7 +1063,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -1395,15 +1367,15 @@
       "Loan amount: $480,000\n",
       "Interest rate: We need to assume an interest rate for this problem. Let's assume an annual interest rate of 6% (this is a common rate).\n",
       "Loan term: 20 years, so 20 * 12 = 240 months\n",
-      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n",
+      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n",
       "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n",
       "\n",
       "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n",
       "Number of months (n) = 240\n",
       "\n",
-      "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n",
-      "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n",
-      "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 – 1]\n",
+      "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n",
+      "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n",
+      "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 \u2013 1]\n",
       "M = 480000 [ 0.016554265] / [2.310853]\n",
       "M = 480000 * 0.00703658\n",
       "M = $331.54\n",
@@ -1412,15 +1384,15 @@
       "Loan amount: $120,000\n",
       "Interest rate: We still assume an annual interest rate of 6% (this is a common rate).\n",
       "Loan term: 20 years, so 20 * 12 = 240 months\n",
-      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n",
+      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n",
       "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n",
       "\n",
       "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n",
       "Number of months (n) = 240\n",
       "\n",
-      "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n",
-      "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n",
-      "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 – 1]\n",
+      "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n",
+      "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n",
+      "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 \u2013 1]\n",
       "M = 120000 [ 0.016554265] / [2.310853]\n",
       "M = 1200 \n",
       "Extracted:\n",
@@ -1985,10 +1957,9 @@
     "  <a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord.png\" width=\"145\"></a>\n",
     "  <a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a>\n",
     "\n",
-    "  Join Discord if you need help + ⭐️ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐️\n",
+    "  Join Discord if you need help + \u2b50\ufe0f <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\ufe0f\n",
     "</div>\n",
-    "\n",
-    "  This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n"
+    "\n  This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n"
    ]
   }
  ],
@@ -2266,9 +2237,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_551b94fe4b3c4a4f8f3220d013a6d897",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_29d36346bc75470eacd30aebf1423e14",
-      "value": "tokenizer.json: 100%"
+      "value": "tokenizer.json:\u2007100%"
      }
     },
     "0ef32700424c4799b8216de4ed8bbbb9": {
@@ -2378,9 +2349,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_383d2f04eccd4e38ad791494a06423ed",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_36cabdee7cd645a58e1ffc794674a322",
-      "value": "special_tokens_map.json: 100%"
+      "value": "special_tokens_map.json:\u2007100%"
      }
     },
     "1664b23faa1b4292bb5727bd525c45be": {
@@ -2473,9 +2444,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_772360d1551141c5ab0a877ae4ed1c76",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_b0c51c819fff44c5a6e8e626fec9e937",
-      "value": "tokenizer_config.json: 100%"
+      "value": "tokenizer_config.json:\u2007100%"
      }
     },
     "19731cf654e64eb3905f02f4ae277e8c": {
@@ -2870,9 +2841,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_6decae16dd5b404ba272d89df9c1372b",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_9151ae8e05634cf4a8ce42677f211bac",
-      "value": "Map: 100%"
+      "value": "Map:\u2007100%"
      }
     },
     "340c07f1b7b14527a378880f9166ef55": {
@@ -3028,9 +2999,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_671b4725f18a4c809b3c97a65ec9e405",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_faa61469d4bc46aebe74d5401d42c3ab",
-      "value": " 35.0/35.0 [00:00&lt;00:00, 2.66kB/s]"
+      "value": "\u200735.0/35.0\u2007[00:00&lt;00:00,\u20072.66kB/s]"
      }
     },
     "3fabb349d1f943b09ed25784a0d0ab0a": {
@@ -3353,9 +3324,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_cf83a6e558f64ff98276981a82f3b2ac",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_0ef32700424c4799b8216de4ed8bbbb9",
-      "value": "test-00000-of-00001.parquet: 100%"
+      "value": "test-00000-of-00001.parquet:\u2007100%"
      }
     },
     "50503005b58845b79254f89f95fb03d9": {
@@ -3398,9 +3369,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_d1e38bbf7593462bb87b14bacee9e3d9",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_846c1dd9b3214c68bbfc362166a6b9ee",
-      "value": " 7473/7473 [00:00&lt;00:00, 5203.68 examples/s]"
+      "value": "\u20077473/7473\u2007[00:00&lt;00:00,\u20075203.68\u2007examples/s]"
      }
     },
     "54c603df4e174b70bd03bce59de287b2": {
@@ -3511,9 +3482,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_9677dc2a2f4847d89d3ca42c01435205",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_645b707de96a4c2eb07eb118db311fb6",
-      "value": "Generating train split: 100%"
+      "value": "Generating\u2007train\u2007split:\u2007100%"
      }
     },
     "61d7ca5d56f14d7d93d5cf5e5b712da2": {
@@ -4060,9 +4031,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_669cb00554134e8287d0daae7372d397",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_9e7bb56731134d9cba89cbde208358e0",
-      "value": "train-00000-of-00001.parquet: 100%"
+      "value": "train-00000-of-00001.parquet:\u2007100%"
      }
     },
     "7c4cd321445b43bfa818b599d42c8cb7": {
@@ -4081,9 +4052,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_cec948b41ee348fcbdf2f5fa290ffe42",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_6d1b19657db94907b6fdf0a3b3a05e84",
-      "value": "README.md: 100%"
+      "value": "README.md:\u2007100%"
      }
     },
     "7d4b49f0c54046a89039d25ee7c11f6f": {
@@ -4258,9 +4229,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_1da84b32592d432fb1a57358ddcdceff",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_bbcd20b75ce445bea7be9597efa68d73",
-      "value": " 33.4M/33.4M [00:00&lt;00:00, 144MB/s]"
+      "value": "\u200733.4M/33.4M\u2007[00:00&lt;00:00,\u2007144MB/s]"
      }
     },
     "8464d2f310b045808bcd7206aa7c8cc5": {
@@ -4309,9 +4280,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_70a0b21c05e642e5891bb8b91cfb2217",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_444aa81458bf4790915c188069506864",
-      "value": "tokenizer.model: 100%"
+      "value": "tokenizer.model:\u2007100%"
      }
     },
     "859b75a69281413383fc1c0946bf63d0": {
@@ -4361,9 +4332,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_02d6b8e9a02b470a92f700d9e7fea5d4",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8464d2f310b045808bcd7206aa7c8cc5",
-      "value": "Generating test split: 100%"
+      "value": "Generating\u2007test\u2007split:\u2007100%"
      }
     },
     "8a21cb24786049e2946c9a050a6673ab": {
@@ -4434,9 +4405,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_0c96fa0b7ed344f4be44632451b406d7",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_bf96f070c2f54468a674f11681fcb22d",
-      "value": " 2.31M/2.31M [00:00&lt;00:00, 17.7MB/s]"
+      "value": "\u20072.31M/2.31M\u2007[00:00&lt;00:00,\u200717.7MB/s]"
      }
     },
     "8ca7d620dc5d415a83b50f134317d925": {
@@ -4828,9 +4799,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_67eda18557dc461483821ffd44a25eb4",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_4cabdb178b97464e875d507dd1410bbf",
-      "value": " 7.94k/7.94k [00:00&lt;00:00, 179kB/s]"
+      "value": "\u20077.94k/7.94k\u2007[00:00&lt;00:00,\u2007179kB/s]"
      }
     },
     "9e7bb56731134d9cba89cbde208358e0": {
@@ -5050,9 +5021,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_def9dbef26334436a6b2298b5b153f47",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_eea838fbfd2c4e82b1769fde1036487e",
-      "value": " 4.69M/4.69M [00:00&lt;00:00, 26.2MB/s]"
+      "value": "\u20074.69M/4.69M\u2007[00:00&lt;00:00,\u200726.2MB/s]"
      }
     },
     "a9dbcb0e164544ba8321a7916500009d": {
@@ -5227,9 +5198,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_45aa2714bf0b44e9af7668c674f63863",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8ca7d620dc5d415a83b50f134317d925",
-      "value": " 215/215 [00:00&lt;00:00, 13.4kB/s]"
+      "value": "\u2007215/215\u2007[00:00&lt;00:00,\u200713.4kB/s]"
      }
     },
     "b0c51c819fff44c5a6e8e626fec9e937": {
@@ -5263,9 +5234,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_d9e72108a08b447ea2a29932391fe429",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_21366789f69d431bad2ac2c8d1ec1565",
-      "value": " 2.00G/2.00G [00:15&lt;00:00, 71.2MB/s]"
+      "value": "\u20072.00G/2.00G\u2007[00:15&lt;00:00,\u200771.2MB/s]"
      }
     },
     "b67486cb78274f2baa4f9afdc2fd7e3c": {
@@ -5284,9 +5255,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_21f2aec6d7af4c3a82306ad235674829",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_fd6ff32a3aa7479e8b5d0cda47819742",
-      "value": "model.safetensors: 100%"
+      "value": "model.safetensors:\u2007100%"
      }
     },
     "ba4ac2596dda42698ae048f9b7a11c61": {
@@ -5865,9 +5836,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_8029855dfa614963a7f5ff11d48dcbdb",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_e74fcfb683074f27806b6aaae329b74d",
-      "value": " 670/670 [00:00&lt;00:00, 74.3kB/s]"
+      "value": "\u2007670/670\u2007[00:00&lt;00:00,\u200774.3kB/s]"
      }
     },
     "d7b48dafe15947c9b225681c4a326581": {
@@ -5975,9 +5946,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_a9dbcb0e164544ba8321a7916500009d",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_a6030d7b5bbd460fb5aa1356c607ec82",
-      "value": " 419k/419k [00:00&lt;00:00, 9.62MB/s]"
+      "value": "\u2007419k/419k\u2007[00:00&lt;00:00,\u20079.62MB/s]"
      }
     },
     "def9dbef26334436a6b2298b5b153f47": {
@@ -6115,9 +6086,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_25c7874a020344e7aad2869f7a27db4e",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_86e9660b2d72460c836d9bba348be56b",
-      "value": " 1319/1319 [00:00&lt;00:00, 12220.76 examples/s]"
+      "value": "\u20071319/1319\u2007[00:00&lt;00:00,\u200712220.76\u2007examples/s]"
      }
     },
     "ed53e921682348b28e2be40eaf96cbf7": {
@@ -6160,9 +6131,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_185b378797334c819f8a199760cac945",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_a1fced067e1e40cbb08e25c9377123e0",
-      "value": "generation_config.json: 100%"
+      "value": "generation_config.json:\u2007100%"
      }
     },
     "eea838fbfd2c4e82b1769fde1036487e": {
@@ -6196,9 +6167,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_45eda31f22294728bbca360abad799e6",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_d7b48dafe15947c9b225681c4a326581",
-      "value": " 1.16M/1.16M [00:00&lt;00:00, 6.41MB/s]"
+      "value": "\u20071.16M/1.16M\u2007[00:00&lt;00:00,\u20076.41MB/s]"
      }
     },
     "efaa563da24149aaaa153b7e8c473394": {
@@ -6376,9 +6347,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_aaf739645f654cd6a94d897358b2c2e0",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_f0c1afb62d8a4616a8df27c9de866916",
-      "value": "added_tokens.json: 100%"
+      "value": "added_tokens.json:\u2007100%"
      }
     },
     "f6a520e1570d4f7faffc1b6e2c6200d2": {
@@ -6397,9 +6368,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_10c20527dc19466eb4d6eb325529a0df",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8f2974213b954318902d39a1fda9b3fb",
-      "value": " 7473/7473 [00:00&lt;00:00, 9263.20 examples/s]"
+      "value": "\u20077473/7473\u2007[00:00&lt;00:00,\u20079263.20\u2007examples/s]"
      }
     },
     "f73516b2403f411d925618cfa2af5f46": {
@@ -6462,4 +6433,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/nb/Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Gemma3_(4B)-Vision-GRPO.ipynb
index 7f1b8d60..ece4d662 100644
--- a/nb/Gemma3_(4B)-Vision-GRPO.ipynb
+++ b/nb/Gemma3_(4B)-Vision-GRPO.ipynb
@@ -1114,7 +1114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1144,7 +1144,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
index 1db244ca..46b67f4c 100644
--- a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb	
+++ b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb	
@@ -819,7 +819,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index 4e990e33..7cbaeea4 100644
--- a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb	
+++ b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb	
@@ -3,8 +3,8 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "view-in-github",
-    "colab_type": "text"
+    "colab_type": "text",
+    "id": "view-in-github"
    },
    "source": [
     "<a href=\"https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@@ -1257,7 +1257,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -12746,8 +12746,8 @@
   "accelerator": "GPU",
   "colab": {
    "gpuType": "T4",
-   "provenance": [],
-   "include_colab_link": true
+   "include_colab_link": true,
+   "provenance": []
   },
   "kernelspec": {
    "display_name": "Python 3",
diff --git a/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
index 87101e65..aef03031 100644
--- a/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb	
+++ b/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb	
@@ -1713,7 +1713,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb
index 7ea19cff..a8f7474e 100644
--- a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb	
@@ -8,7 +8,7 @@
     "<div class=\"align-center\"><a href=\"https://huggingface.co/learn/nlp-course/en/chapter12/6?fw=pt\"><img src=\"https://github.com/unslothai/notebooks/raw/main/assets/hf%20course.png\" width=\"165\"></a>\n",
     "<a href=\"https://unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
     "<a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord button.png\" width=\"145\"></a>\n",
-    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐\n",
+    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + \u2b50 <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\n",
     "</div>\n",
     "\n",
     "In this [Hugging Face](https://huggingface.co/learn/nlp-course/en/chapter12/6?fw=pt) and Unsloth notebook, you will learn to transform Gemma3 (1B) GRPO into a Reasoning model using GRPO.\n",
@@ -53,42 +53,14 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "%%capture\n",
-    "import os\n",
-    "os.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n",
-    "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n",
-    "    # If you're not in Colab, just use pip install or uv pip install\n",
-    "    !pip install unsloth vllm\n",
-    "else:\n",
-    "    pass # For Colab / Kaggle, we need extra instructions hidden below \\/"
-   ]
+   "source": "%%capture\nimport os\nos.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n    # If you're not in Colab, just use pip install or uv pip install\n    !pip install unsloth vllm\nelse:\n    pass # For Colab / Kaggle, we need extra instructions hidden below \\/"
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "#@title Colab Extra Install { display-mode: \"form\" }\n",
-    "%%capture\n",
-    "import os\n",
-    "!pip install --upgrade -qqq uv\n",
-    "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n",
-    "    # If you're not in Colab, just use pip install!\n",
-    "    !pip install unsloth vllm\n",
-    "else:\n",
-    "    try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n",
-    "    except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n",
-    "    try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n",
-    "    except: is_t4 = False\n",
-    "    get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n",
-    "    !uv pip install -qqq --upgrade \\\n",
-    "        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n",
-    "    !uv pip install -qqq {get_triton}\n",
-    "!uv pip install transformers==4.56.2\n",
-    "!uv pip install --no-deps trl==0.22.2"
-   ]
+   "source": "#@title Colab Extra Install { display-mode: \"form\" }\n%%capture\nimport os\n!pip install --upgrade -qqq uv\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n    # If you're not in Colab, just use pip install!\n    !pip install unsloth vllm\nelse:\n    try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n    except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n    try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n    except: is_t4 = False\n    get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n    !uv pip install -qqq --upgrade \\\n        unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n    !uv pip install -qqq {get_triton}\n!uv pip install transformers==4.56.2\n!uv pip install --no-deps trl==0.22.2"
   },
   {
    "cell_type": "markdown",
@@ -203,8 +175,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
-      "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
+      "\ud83e\udda5 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+      "\ud83e\udda5 Unsloth Zoo will now patch everything to make training faster!\n",
       "INFO 03-19 15:51:40 [__init__.py:256] Automatically detected platform cuda.\n",
       "==((====))==  Unsloth 2025.3.17: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.0.\n",
       "   \\\\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n",
@@ -1062,7 +1034,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1093,7 +1065,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -1397,15 +1369,15 @@
       "Loan amount: $480,000\n",
       "Interest rate: We need to assume an interest rate for this problem. Let's assume an annual interest rate of 6% (this is a common rate).\n",
       "Loan term: 20 years, so 20 * 12 = 240 months\n",
-      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n",
+      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n",
       "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n",
       "\n",
       "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n",
       "Number of months (n) = 240\n",
       "\n",
-      "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n",
-      "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n",
-      "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 – 1]\n",
+      "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n",
+      "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n",
+      "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 \u2013 1]\n",
       "M = 480000 [ 0.016554265] / [2.310853]\n",
       "M = 480000 * 0.00703658\n",
       "M = $331.54\n",
@@ -1414,15 +1386,15 @@
       "Loan amount: $120,000\n",
       "Interest rate: We still assume an annual interest rate of 6% (this is a common rate).\n",
       "Loan term: 20 years, so 20 * 12 = 240 months\n",
-      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n",
+      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n",
       "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n",
       "\n",
       "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n",
       "Number of months (n) = 240\n",
       "\n",
-      "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n",
-      "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n",
-      "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 – 1]\n",
+      "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n",
+      "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n",
+      "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 \u2013 1]\n",
       "M = 120000 [ 0.016554265] / [2.310853]\n",
       "M = 1200 \n",
       "Extracted:\n",
@@ -1987,10 +1959,9 @@
     "  <a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord.png\" width=\"145\"></a>\n",
     "  <a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a>\n",
     "\n",
-    "  Join Discord if you need help + ⭐️ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐️\n",
+    "  Join Discord if you need help + \u2b50\ufe0f <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\ufe0f\n",
     "</div>\n",
-    "\n",
-    "  This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n"
+    "\n  This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n"
    ]
   }
  ],
@@ -2268,9 +2239,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_551b94fe4b3c4a4f8f3220d013a6d897",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_29d36346bc75470eacd30aebf1423e14",
-      "value": "tokenizer.json: 100%"
+      "value": "tokenizer.json:\u2007100%"
      }
     },
     "0ef32700424c4799b8216de4ed8bbbb9": {
@@ -2380,9 +2351,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_383d2f04eccd4e38ad791494a06423ed",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_36cabdee7cd645a58e1ffc794674a322",
-      "value": "special_tokens_map.json: 100%"
+      "value": "special_tokens_map.json:\u2007100%"
      }
     },
     "1664b23faa1b4292bb5727bd525c45be": {
@@ -2475,9 +2446,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_772360d1551141c5ab0a877ae4ed1c76",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_b0c51c819fff44c5a6e8e626fec9e937",
-      "value": "tokenizer_config.json: 100%"
+      "value": "tokenizer_config.json:\u2007100%"
      }
     },
     "19731cf654e64eb3905f02f4ae277e8c": {
@@ -2872,9 +2843,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_6decae16dd5b404ba272d89df9c1372b",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_9151ae8e05634cf4a8ce42677f211bac",
-      "value": "Map: 100%"
+      "value": "Map:\u2007100%"
      }
     },
     "340c07f1b7b14527a378880f9166ef55": {
@@ -3030,9 +3001,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_671b4725f18a4c809b3c97a65ec9e405",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_faa61469d4bc46aebe74d5401d42c3ab",
-      "value": " 35.0/35.0 [00:00&lt;00:00, 2.66kB/s]"
+      "value": "\u200735.0/35.0\u2007[00:00&lt;00:00,\u20072.66kB/s]"
      }
     },
     "3fabb349d1f943b09ed25784a0d0ab0a": {
@@ -3355,9 +3326,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_cf83a6e558f64ff98276981a82f3b2ac",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_0ef32700424c4799b8216de4ed8bbbb9",
-      "value": "test-00000-of-00001.parquet: 100%"
+      "value": "test-00000-of-00001.parquet:\u2007100%"
      }
     },
     "50503005b58845b79254f89f95fb03d9": {
@@ -3400,9 +3371,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_d1e38bbf7593462bb87b14bacee9e3d9",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_846c1dd9b3214c68bbfc362166a6b9ee",
-      "value": " 7473/7473 [00:00&lt;00:00, 5203.68 examples/s]"
+      "value": "\u20077473/7473\u2007[00:00&lt;00:00,\u20075203.68\u2007examples/s]"
      }
     },
     "54c603df4e174b70bd03bce59de287b2": {
@@ -3513,9 +3484,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_9677dc2a2f4847d89d3ca42c01435205",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_645b707de96a4c2eb07eb118db311fb6",
-      "value": "Generating train split: 100%"
+      "value": "Generating\u2007train\u2007split:\u2007100%"
      }
     },
     "61d7ca5d56f14d7d93d5cf5e5b712da2": {
@@ -4062,9 +4033,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_669cb00554134e8287d0daae7372d397",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_9e7bb56731134d9cba89cbde208358e0",
-      "value": "train-00000-of-00001.parquet: 100%"
+      "value": "train-00000-of-00001.parquet:\u2007100%"
      }
     },
     "7c4cd321445b43bfa818b599d42c8cb7": {
@@ -4083,9 +4054,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_cec948b41ee348fcbdf2f5fa290ffe42",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_6d1b19657db94907b6fdf0a3b3a05e84",
-      "value": "README.md: 100%"
+      "value": "README.md:\u2007100%"
      }
     },
     "7d4b49f0c54046a89039d25ee7c11f6f": {
@@ -4260,9 +4231,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_1da84b32592d432fb1a57358ddcdceff",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_bbcd20b75ce445bea7be9597efa68d73",
-      "value": " 33.4M/33.4M [00:00&lt;00:00, 144MB/s]"
+      "value": "\u200733.4M/33.4M\u2007[00:00&lt;00:00,\u2007144MB/s]"
      }
     },
     "8464d2f310b045808bcd7206aa7c8cc5": {
@@ -4311,9 +4282,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_70a0b21c05e642e5891bb8b91cfb2217",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_444aa81458bf4790915c188069506864",
-      "value": "tokenizer.model: 100%"
+      "value": "tokenizer.model:\u2007100%"
      }
     },
     "859b75a69281413383fc1c0946bf63d0": {
@@ -4363,9 +4334,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_02d6b8e9a02b470a92f700d9e7fea5d4",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8464d2f310b045808bcd7206aa7c8cc5",
-      "value": "Generating test split: 100%"
+      "value": "Generating\u2007test\u2007split:\u2007100%"
      }
     },
     "8a21cb24786049e2946c9a050a6673ab": {
@@ -4436,9 +4407,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_0c96fa0b7ed344f4be44632451b406d7",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_bf96f070c2f54468a674f11681fcb22d",
-      "value": " 2.31M/2.31M [00:00&lt;00:00, 17.7MB/s]"
+      "value": "\u20072.31M/2.31M\u2007[00:00&lt;00:00,\u200717.7MB/s]"
      }
     },
     "8ca7d620dc5d415a83b50f134317d925": {
@@ -4830,9 +4801,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_67eda18557dc461483821ffd44a25eb4",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_4cabdb178b97464e875d507dd1410bbf",
-      "value": " 7.94k/7.94k [00:00&lt;00:00, 179kB/s]"
+      "value": "\u20077.94k/7.94k\u2007[00:00&lt;00:00,\u2007179kB/s]"
      }
     },
     "9e7bb56731134d9cba89cbde208358e0": {
@@ -5052,9 +5023,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_def9dbef26334436a6b2298b5b153f47",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_eea838fbfd2c4e82b1769fde1036487e",
-      "value": " 4.69M/4.69M [00:00&lt;00:00, 26.2MB/s]"
+      "value": "\u20074.69M/4.69M\u2007[00:00&lt;00:00,\u200726.2MB/s]"
      }
     },
     "a9dbcb0e164544ba8321a7916500009d": {
@@ -5229,9 +5200,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_45aa2714bf0b44e9af7668c674f63863",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8ca7d620dc5d415a83b50f134317d925",
-      "value": " 215/215 [00:00&lt;00:00, 13.4kB/s]"
+      "value": "\u2007215/215\u2007[00:00&lt;00:00,\u200713.4kB/s]"
      }
     },
     "b0c51c819fff44c5a6e8e626fec9e937": {
@@ -5265,9 +5236,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_d9e72108a08b447ea2a29932391fe429",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_21366789f69d431bad2ac2c8d1ec1565",
-      "value": " 2.00G/2.00G [00:15&lt;00:00, 71.2MB/s]"
+      "value": "\u20072.00G/2.00G\u2007[00:15&lt;00:00,\u200771.2MB/s]"
      }
     },
     "b67486cb78274f2baa4f9afdc2fd7e3c": {
@@ -5286,9 +5257,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_21f2aec6d7af4c3a82306ad235674829",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_fd6ff32a3aa7479e8b5d0cda47819742",
-      "value": "model.safetensors: 100%"
+      "value": "model.safetensors:\u2007100%"
      }
     },
     "ba4ac2596dda42698ae048f9b7a11c61": {
@@ -5867,9 +5838,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_8029855dfa614963a7f5ff11d48dcbdb",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_e74fcfb683074f27806b6aaae329b74d",
-      "value": " 670/670 [00:00&lt;00:00, 74.3kB/s]"
+      "value": "\u2007670/670\u2007[00:00&lt;00:00,\u200774.3kB/s]"
      }
     },
     "d7b48dafe15947c9b225681c4a326581": {
@@ -5977,9 +5948,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_a9dbcb0e164544ba8321a7916500009d",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_a6030d7b5bbd460fb5aa1356c607ec82",
-      "value": " 419k/419k [00:00&lt;00:00, 9.62MB/s]"
+      "value": "\u2007419k/419k\u2007[00:00&lt;00:00,\u20079.62MB/s]"
      }
     },
     "def9dbef26334436a6b2298b5b153f47": {
@@ -6117,9 +6088,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_25c7874a020344e7aad2869f7a27db4e",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_86e9660b2d72460c836d9bba348be56b",
-      "value": " 1319/1319 [00:00&lt;00:00, 12220.76 examples/s]"
+      "value": "\u20071319/1319\u2007[00:00&lt;00:00,\u200712220.76\u2007examples/s]"
      }
     },
     "ed53e921682348b28e2be40eaf96cbf7": {
@@ -6162,9 +6133,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_185b378797334c819f8a199760cac945",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_a1fced067e1e40cbb08e25c9377123e0",
-      "value": "generation_config.json: 100%"
+      "value": "generation_config.json:\u2007100%"
      }
     },
     "eea838fbfd2c4e82b1769fde1036487e": {
@@ -6198,9 +6169,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_45eda31f22294728bbca360abad799e6",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_d7b48dafe15947c9b225681c4a326581",
-      "value": " 1.16M/1.16M [00:00&lt;00:00, 6.41MB/s]"
+      "value": "\u20071.16M/1.16M\u2007[00:00&lt;00:00,\u20076.41MB/s]"
      }
     },
     "efaa563da24149aaaa153b7e8c473394": {
@@ -6378,9 +6349,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_aaf739645f654cd6a94d897358b2c2e0",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_f0c1afb62d8a4616a8df27c9de866916",
-      "value": "added_tokens.json: 100%"
+      "value": "added_tokens.json:\u2007100%"
      }
     },
     "f6a520e1570d4f7faffc1b6e2c6200d2": {
@@ -6399,9 +6370,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_10c20527dc19466eb4d6eb325529a0df",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8f2974213b954318902d39a1fda9b3fb",
-      "value": " 7473/7473 [00:00&lt;00:00, 9263.20 examples/s]"
+      "value": "\u20077473/7473\u2007[00:00&lt;00:00,\u20079263.20\u2007examples/s]"
      }
     },
     "f73516b2403f411d925618cfa2af5f46": {
@@ -6464,4 +6435,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb
index 0993902d..b4dcef32 100644
--- a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb	
@@ -1116,7 +1116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1146,7 +1146,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb
index 49fdfd4c..abb1f8e1 100644
--- a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb	
@@ -739,7 +739,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -770,7 +770,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb
index cea2ec93..02489321 100644
--- a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb	
@@ -614,7 +614,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb
index cc4f3d6c..41da309c 100644
--- a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb	
@@ -392,7 +392,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb
index be8cb875..7c98ce5e 100644
--- a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb	
@@ -909,7 +909,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb
index 304290c2..6fc7c400 100644
--- a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb	
@@ -1376,7 +1376,7 @@
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
     "    log_completions = False,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb
index dc5679ca..1ae0f2a5 100644
--- a/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb	
@@ -2614,7 +2614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -2656,7 +2656,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb
index 568593a8..b9e7caf1 100644
--- a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb	
+++ b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb	
@@ -1278,7 +1278,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1308,7 +1308,7 @@
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
     "    log_completions = False,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb b/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb
index 7dad4d8a..3a5c7f1d 100644
--- a/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb	
+++ b/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb	
@@ -1684,7 +1684,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb
index 81a336fa..b48eb85c 100644
--- a/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb	
+++ b/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb	
@@ -1812,7 +1812,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
index 61be2340..6da4f1b2 100644
--- a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
+++ b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
@@ -808,7 +808,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index 4b0944aa..ec200d0b 100644
--- a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
+++ b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
@@ -3,8 +3,8 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "view-in-github",
-    "colab_type": "text"
+    "colab_type": "text",
+    "id": "view-in-github"
    },
    "source": [
     "<a href=\"https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@@ -1248,7 +1248,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -12737,8 +12737,8 @@
   "accelerator": "GPU",
   "colab": {
    "gpuType": "T4",
-   "provenance": [],
-   "include_colab_link": true
+   "include_colab_link": true,
+   "provenance": []
   },
   "kernelspec": {
    "display_name": "Python 3",
diff --git a/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
index 6af2eedb..8afb6453 100644
--- a/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
+++ b/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
@@ -1704,7 +1704,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb
index 9e9f25e9..72adabe9 100644
--- a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb
+++ b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb
@@ -8,7 +8,7 @@
     "<div class=\"align-center\">\n",
     "<a href=\"https://unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
     "<a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord button.png\" width=\"145\"></a>\n",
-    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐\n",
+    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + \u2b50 <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\n",
     "</div>\n",
     "\n",
     "To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n",
@@ -51,22 +51,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "%%capture\n",
-    "import os\n",
-    "os.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n",
-    "!pip install --upgrade -qqq uv\n",
-    "try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n",
-    "except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n",
-    "try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n",
-    "except: is_t4 = False\n",
-    "get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n",
-    "!uv pip install -qqq --upgrade     unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n",
-    "!uv pip install -qqq {get_triton}\n",
-    "!uv pip install \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.\n",
-    "!uv pip install transformers==4.56.2\n",
-    "!uv pip install --no-deps trl==0.22.2"
-   ]
+   "source": "%%capture\nimport os\nos.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n!pip install --upgrade -qqq uv\ntry: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\nexcept: get_numpy = \"numpy\"; get_pil = \"pillow\"\ntry: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\nexcept: is_t4 = False\nget_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n!uv pip install -qqq --upgrade     unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n!uv pip install -qqq {get_triton}\n!uv pip install \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.\n!uv pip install transformers==4.56.2\n!uv pip install --no-deps trl==0.22.2"
   },
   {
    "cell_type": "markdown",
@@ -181,8 +166,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
-      "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
+      "\ud83e\udda5 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+      "\ud83e\udda5 Unsloth Zoo will now patch everything to make training faster!\n",
       "INFO 03-19 15:51:40 [__init__.py:256] Automatically detected platform cuda.\n",
       "==((====))==  Unsloth 2025.3.17: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.0.\n",
       "   \\\\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n",
@@ -1040,7 +1025,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1071,7 +1056,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -1375,15 +1360,15 @@
       "Loan amount: $480,000\n",
       "Interest rate: We need to assume an interest rate for this problem. Let's assume an annual interest rate of 6% (this is a common rate).\n",
       "Loan term: 20 years, so 20 * 12 = 240 months\n",
-      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n",
+      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n",
       "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n",
       "\n",
       "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n",
       "Number of months (n) = 240\n",
       "\n",
-      "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n",
-      "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n",
-      "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 – 1]\n",
+      "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n",
+      "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n",
+      "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 \u2013 1]\n",
       "M = 480000 [ 0.016554265] / [2.310853]\n",
       "M = 480000 * 0.00703658\n",
       "M = $331.54\n",
@@ -1392,15 +1377,15 @@
       "Loan amount: $120,000\n",
       "Interest rate: We still assume an annual interest rate of 6% (this is a common rate).\n",
       "Loan term: 20 years, so 20 * 12 = 240 months\n",
-      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n",
+      "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n",
       "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n",
       "\n",
       "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n",
       "Number of months (n) = 240\n",
       "\n",
-      "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n",
-      "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n",
-      "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 – 1]\n",
+      "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n",
+      "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n",
+      "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 \u2013 1]\n",
       "M = 120000 [ 0.016554265] / [2.310853]\n",
       "M = 1200 \n",
       "Extracted:\n",
@@ -1965,10 +1950,9 @@
     "  <a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord.png\" width=\"145\"></a>\n",
     "  <a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a>\n",
     "\n",
-    "  Join Discord if you need help + ⭐️ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐️\n",
+    "  Join Discord if you need help + \u2b50\ufe0f <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\ufe0f\n",
     "</div>\n",
-    "\n",
-    "  This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n"
+    "\n  This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n"
    ]
   }
  ],
@@ -2246,9 +2230,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_551b94fe4b3c4a4f8f3220d013a6d897",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_29d36346bc75470eacd30aebf1423e14",
-      "value": "tokenizer.json: 100%"
+      "value": "tokenizer.json:\u2007100%"
      }
     },
     "0ef32700424c4799b8216de4ed8bbbb9": {
@@ -2358,9 +2342,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_383d2f04eccd4e38ad791494a06423ed",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_36cabdee7cd645a58e1ffc794674a322",
-      "value": "special_tokens_map.json: 100%"
+      "value": "special_tokens_map.json:\u2007100%"
      }
     },
     "1664b23faa1b4292bb5727bd525c45be": {
@@ -2453,9 +2437,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_772360d1551141c5ab0a877ae4ed1c76",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_b0c51c819fff44c5a6e8e626fec9e937",
-      "value": "tokenizer_config.json: 100%"
+      "value": "tokenizer_config.json:\u2007100%"
      }
     },
     "19731cf654e64eb3905f02f4ae277e8c": {
@@ -2850,9 +2834,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_6decae16dd5b404ba272d89df9c1372b",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_9151ae8e05634cf4a8ce42677f211bac",
-      "value": "Map: 100%"
+      "value": "Map:\u2007100%"
      }
     },
     "340c07f1b7b14527a378880f9166ef55": {
@@ -3008,9 +2992,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_671b4725f18a4c809b3c97a65ec9e405",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_faa61469d4bc46aebe74d5401d42c3ab",
-      "value": " 35.0/35.0 [00:00&lt;00:00, 2.66kB/s]"
+      "value": "\u200735.0/35.0\u2007[00:00&lt;00:00,\u20072.66kB/s]"
      }
     },
     "3fabb349d1f943b09ed25784a0d0ab0a": {
@@ -3333,9 +3317,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_cf83a6e558f64ff98276981a82f3b2ac",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_0ef32700424c4799b8216de4ed8bbbb9",
-      "value": "test-00000-of-00001.parquet: 100%"
+      "value": "test-00000-of-00001.parquet:\u2007100%"
      }
     },
     "50503005b58845b79254f89f95fb03d9": {
@@ -3378,9 +3362,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_d1e38bbf7593462bb87b14bacee9e3d9",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_846c1dd9b3214c68bbfc362166a6b9ee",
-      "value": " 7473/7473 [00:00&lt;00:00, 5203.68 examples/s]"
+      "value": "\u20077473/7473\u2007[00:00&lt;00:00,\u20075203.68\u2007examples/s]"
      }
     },
     "54c603df4e174b70bd03bce59de287b2": {
@@ -3491,9 +3475,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_9677dc2a2f4847d89d3ca42c01435205",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_645b707de96a4c2eb07eb118db311fb6",
-      "value": "Generating train split: 100%"
+      "value": "Generating\u2007train\u2007split:\u2007100%"
      }
     },
     "61d7ca5d56f14d7d93d5cf5e5b712da2": {
@@ -4040,9 +4024,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_669cb00554134e8287d0daae7372d397",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_9e7bb56731134d9cba89cbde208358e0",
-      "value": "train-00000-of-00001.parquet: 100%"
+      "value": "train-00000-of-00001.parquet:\u2007100%"
      }
     },
     "7c4cd321445b43bfa818b599d42c8cb7": {
@@ -4061,9 +4045,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_cec948b41ee348fcbdf2f5fa290ffe42",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_6d1b19657db94907b6fdf0a3b3a05e84",
-      "value": "README.md: 100%"
+      "value": "README.md:\u2007100%"
      }
     },
     "7d4b49f0c54046a89039d25ee7c11f6f": {
@@ -4238,9 +4222,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_1da84b32592d432fb1a57358ddcdceff",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_bbcd20b75ce445bea7be9597efa68d73",
-      "value": " 33.4M/33.4M [00:00&lt;00:00, 144MB/s]"
+      "value": "\u200733.4M/33.4M\u2007[00:00&lt;00:00,\u2007144MB/s]"
      }
     },
     "8464d2f310b045808bcd7206aa7c8cc5": {
@@ -4289,9 +4273,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_70a0b21c05e642e5891bb8b91cfb2217",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_444aa81458bf4790915c188069506864",
-      "value": "tokenizer.model: 100%"
+      "value": "tokenizer.model:\u2007100%"
      }
     },
     "859b75a69281413383fc1c0946bf63d0": {
@@ -4341,9 +4325,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_02d6b8e9a02b470a92f700d9e7fea5d4",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8464d2f310b045808bcd7206aa7c8cc5",
-      "value": "Generating test split: 100%"
+      "value": "Generating\u2007test\u2007split:\u2007100%"
      }
     },
     "8a21cb24786049e2946c9a050a6673ab": {
@@ -4414,9 +4398,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_0c96fa0b7ed344f4be44632451b406d7",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_bf96f070c2f54468a674f11681fcb22d",
-      "value": " 2.31M/2.31M [00:00&lt;00:00, 17.7MB/s]"
+      "value": "\u20072.31M/2.31M\u2007[00:00&lt;00:00,\u200717.7MB/s]"
      }
     },
     "8ca7d620dc5d415a83b50f134317d925": {
@@ -4808,9 +4792,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_67eda18557dc461483821ffd44a25eb4",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_4cabdb178b97464e875d507dd1410bbf",
-      "value": " 7.94k/7.94k [00:00&lt;00:00, 179kB/s]"
+      "value": "\u20077.94k/7.94k\u2007[00:00&lt;00:00,\u2007179kB/s]"
      }
     },
     "9e7bb56731134d9cba89cbde208358e0": {
@@ -5030,9 +5014,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_def9dbef26334436a6b2298b5b153f47",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_eea838fbfd2c4e82b1769fde1036487e",
-      "value": " 4.69M/4.69M [00:00&lt;00:00, 26.2MB/s]"
+      "value": "\u20074.69M/4.69M\u2007[00:00&lt;00:00,\u200726.2MB/s]"
      }
     },
     "a9dbcb0e164544ba8321a7916500009d": {
@@ -5207,9 +5191,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_45aa2714bf0b44e9af7668c674f63863",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8ca7d620dc5d415a83b50f134317d925",
-      "value": " 215/215 [00:00&lt;00:00, 13.4kB/s]"
+      "value": "\u2007215/215\u2007[00:00&lt;00:00,\u200713.4kB/s]"
      }
     },
     "b0c51c819fff44c5a6e8e626fec9e937": {
@@ -5243,9 +5227,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_d9e72108a08b447ea2a29932391fe429",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_21366789f69d431bad2ac2c8d1ec1565",
-      "value": " 2.00G/2.00G [00:15&lt;00:00, 71.2MB/s]"
+      "value": "\u20072.00G/2.00G\u2007[00:15&lt;00:00,\u200771.2MB/s]"
      }
     },
     "b67486cb78274f2baa4f9afdc2fd7e3c": {
@@ -5264,9 +5248,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_21f2aec6d7af4c3a82306ad235674829",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_fd6ff32a3aa7479e8b5d0cda47819742",
-      "value": "model.safetensors: 100%"
+      "value": "model.safetensors:\u2007100%"
      }
     },
     "ba4ac2596dda42698ae048f9b7a11c61": {
@@ -5845,9 +5829,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_8029855dfa614963a7f5ff11d48dcbdb",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_e74fcfb683074f27806b6aaae329b74d",
-      "value": " 670/670 [00:00&lt;00:00, 74.3kB/s]"
+      "value": "\u2007670/670\u2007[00:00&lt;00:00,\u200774.3kB/s]"
      }
     },
     "d7b48dafe15947c9b225681c4a326581": {
@@ -5955,9 +5939,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_a9dbcb0e164544ba8321a7916500009d",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_a6030d7b5bbd460fb5aa1356c607ec82",
-      "value": " 419k/419k [00:00&lt;00:00, 9.62MB/s]"
+      "value": "\u2007419k/419k\u2007[00:00&lt;00:00,\u20079.62MB/s]"
      }
     },
     "def9dbef26334436a6b2298b5b153f47": {
@@ -6095,9 +6079,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_25c7874a020344e7aad2869f7a27db4e",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_86e9660b2d72460c836d9bba348be56b",
-      "value": " 1319/1319 [00:00&lt;00:00, 12220.76 examples/s]"
+      "value": "\u20071319/1319\u2007[00:00&lt;00:00,\u200712220.76\u2007examples/s]"
      }
     },
     "ed53e921682348b28e2be40eaf96cbf7": {
@@ -6140,9 +6124,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_185b378797334c819f8a199760cac945",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_a1fced067e1e40cbb08e25c9377123e0",
-      "value": "generation_config.json: 100%"
+      "value": "generation_config.json:\u2007100%"
      }
     },
     "eea838fbfd2c4e82b1769fde1036487e": {
@@ -6176,9 +6160,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_45eda31f22294728bbca360abad799e6",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_d7b48dafe15947c9b225681c4a326581",
-      "value": " 1.16M/1.16M [00:00&lt;00:00, 6.41MB/s]"
+      "value": "\u20071.16M/1.16M\u2007[00:00&lt;00:00,\u20076.41MB/s]"
      }
     },
     "efaa563da24149aaaa153b7e8c473394": {
@@ -6356,9 +6340,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_aaf739645f654cd6a94d897358b2c2e0",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_f0c1afb62d8a4616a8df27c9de866916",
-      "value": "added_tokens.json: 100%"
+      "value": "added_tokens.json:\u2007100%"
      }
     },
     "f6a520e1570d4f7faffc1b6e2c6200d2": {
@@ -6377,9 +6361,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_10c20527dc19466eb4d6eb325529a0df",
-      "placeholder": "​",
+      "placeholder": "\u200b",
       "style": "IPY_MODEL_8f2974213b954318902d39a1fda9b3fb",
-      "value": " 7473/7473 [00:00&lt;00:00, 9263.20 examples/s]"
+      "value": "\u20077473/7473\u2007[00:00&lt;00:00,\u20079263.20\u2007examples/s]"
      }
     },
     "f73516b2403f411d925618cfa2af5f46": {
@@ -6442,4 +6426,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb
index 66a7912e..ae0dd088 100644
--- a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb
+++ b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb
@@ -1107,7 +1107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1137,7 +1137,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
index 43327c1c..616f4587 100644
--- a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
+++ b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb
@@ -730,7 +730,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -761,7 +761,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb
index 5966dfa1..6f19ae5f 100644
--- a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb
+++ b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb
@@ -605,7 +605,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
index d3850047..7f7dfcdc 100644
--- a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
+++ b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb
@@ -383,7 +383,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
index 81b4eb97..9a3dbc2e 100644
--- a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
+++ b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb
@@ -900,7 +900,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb
index cedb3bec..b96fca21 100644
--- a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb
+++ b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb
@@ -1367,7 +1367,7 @@
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
     "    log_completions = False,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb b/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb
index 4b48294f..fa45a5e1 100644
--- a/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb
+++ b/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb
@@ -2605,7 +2605,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -2647,7 +2647,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb
index 4576f4a3..3f63ddf9 100644
--- a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb
+++ b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb
@@ -1269,7 +1269,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1299,7 +1299,7 @@
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
     "    log_completions = False,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb b/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb
index 61f365f2..389da9c2 100644
--- a/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb
+++ b/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb
@@ -1682,7 +1682,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb
index c4f6d29d..20a2202e 100644
--- a/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb
+++ b/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb
@@ -1810,7 +1810,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Llama3.1_(8B)-GRPO.ipynb b/nb/Llama3.1_(8B)-GRPO.ipynb
index 7f3bd69e..207d094e 100644
--- a/nb/Llama3.1_(8B)-GRPO.ipynb
+++ b/nb/Llama3.1_(8B)-GRPO.ipynb
@@ -737,7 +737,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -768,7 +768,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Mistral_v0.3_(7B)-GRPO.ipynb
index 5ccdbe0c..72020966 100644
--- a/nb/Mistral_v0.3_(7B)-GRPO.ipynb
+++ b/nb/Mistral_v0.3_(7B)-GRPO.ipynb
@@ -612,7 +612,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/Phi_4_(14B)-GRPO.ipynb b/nb/Phi_4_(14B)-GRPO.ipynb
index 165b1051..5e8ba2d8 100644
--- a/nb/Phi_4_(14B)-GRPO.ipynb
+++ b/nb/Phi_4_(14B)-GRPO.ipynb
@@ -390,7 +390,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/nb/Qwen2.5_(3B)-GRPO.ipynb b/nb/Qwen2.5_(3B)-GRPO.ipynb
index 0bb2bf48..432a36ae 100644
--- a/nb/Qwen2.5_(3B)-GRPO.ipynb
+++ b/nb/Qwen2.5_(3B)-GRPO.ipynb
@@ -907,7 +907,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/nb/Qwen2_5_7B_VL_GRPO.ipynb b/nb/Qwen2_5_7B_VL_GRPO.ipynb
index 5fc1c362..458088b4 100644
--- a/nb/Qwen2_5_7B_VL_GRPO.ipynb
+++ b/nb/Qwen2_5_7B_VL_GRPO.ipynb
@@ -1374,7 +1374,7 @@
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
     "    log_completions = False,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = 1024,\n",
diff --git a/nb/Qwen3_(4B)-GRPO.ipynb b/nb/Qwen3_(4B)-GRPO.ipynb
index 9de7e7ce..96c6a87b 100644
--- a/nb/Qwen3_(4B)-GRPO.ipynb
+++ b/nb/Qwen3_(4B)-GRPO.ipynb
@@ -2612,7 +2612,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -2654,7 +2654,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/gpt-oss-(20B)-GRPO.ipynb b/nb/gpt-oss-(20B)-GRPO.ipynb
index 61f365f2..389da9c2 100644
--- a/nb/gpt-oss-(20B)-GRPO.ipynb
+++ b/nb/gpt-oss-(20B)-GRPO.ipynb
@@ -1682,7 +1682,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/nb/gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/gpt_oss_(20B)_GRPO_BF16.ipynb
index c4f6d29d..20a2202e 100644
--- a/nb/gpt_oss_(20B)_GRPO_BF16.ipynb
+++ b/nb/gpt_oss_(20B)_GRPO_BF16.ipynb
@@ -1810,7 +1810,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
index 12f5a140..d7d195a0 100644
--- a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
+++ b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb
@@ -796,7 +796,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
index b5016295..c7d710a7 100644
--- a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
+++ b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb
@@ -3,8 +3,8 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "view-in-github",
-    "colab_type": "text"
+    "colab_type": "text",
+    "id": "view-in-github"
    },
    "source": [
     "<a href=\"https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@@ -1278,7 +1278,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
@@ -12766,8 +12766,8 @@
   "accelerator": "GPU",
   "colab": {
    "gpuType": "T4",
-   "provenance": [],
-   "include_colab_link": true
+   "include_colab_link": true,
+   "provenance": []
   },
   "kernelspec": {
    "display_name": "Python 3",
diff --git a/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
index 0f36aad2..4abe825b 100644
--- a/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
+++ b/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb
@@ -1690,7 +1690,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Gemma3_(1B)-GRPO.ipynb b/original_template/Gemma3_(1B)-GRPO.ipynb
index ee0615ba..a891f962 100644
--- a/original_template/Gemma3_(1B)-GRPO.ipynb
+++ b/original_template/Gemma3_(1B)-GRPO.ipynb
@@ -1011,7 +1011,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -1042,7 +1042,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_torch_fused\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb
index 8cc14868..1b7170e5 100644
--- a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb
+++ b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb
@@ -1093,7 +1093,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -1123,7 +1123,7 @@
         "    lr_scheduler_type = \"cosine\",\n",
         "    optim = \"adamw_8bit\",\n",
         "    logging_steps = 1,\n",
-        "    per_device_train_batch_size = 1,\n",
+        "    per_device_train_batch_size = 4,\n",
         "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
         "    num_generations = 4, # Decrease if out of memory\n",
         "    max_prompt_length = 1024,\n",
diff --git a/original_template/Llama3.1_(8B)-GRPO.ipynb b/original_template/Llama3.1_(8B)-GRPO.ipynb
index 38b2317a..fdb6354c 100644
--- a/original_template/Llama3.1_(8B)-GRPO.ipynb
+++ b/original_template/Llama3.1_(8B)-GRPO.ipynb
@@ -716,7 +716,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -747,7 +747,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb
index e8e8d23a..5d987984 100644
--- a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb
+++ b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb
@@ -591,7 +591,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Phi_4_(14B)-GRPO.ipynb b/original_template/Phi_4_(14B)-GRPO.ipynb
index 0346fb2d..1ebf2f03 100644
--- a/original_template/Phi_4_(14B)-GRPO.ipynb
+++ b/original_template/Phi_4_(14B)-GRPO.ipynb
@@ -369,7 +369,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"paged_adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 6,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 6, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/original_template/Qwen2.5_(3B)-GRPO.ipynb b/original_template/Qwen2.5_(3B)-GRPO.ipynb
index a1c58094..24615a43 100644
--- a/original_template/Qwen2.5_(3B)-GRPO.ipynb
+++ b/original_template/Qwen2.5_(3B)-GRPO.ipynb
@@ -886,7 +886,7 @@
     "    lr_scheduler_type = \"cosine\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 8,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 8, # Decrease if out of memory\n",
     "    max_prompt_length = 256,\n",
diff --git a/original_template/Qwen2_5_7B_VL_GRPO.ipynb b/original_template/Qwen2_5_7B_VL_GRPO.ipynb
index 812e1d4d..de12d0c3 100644
--- a/original_template/Qwen2_5_7B_VL_GRPO.ipynb
+++ b/original_template/Qwen2_5_7B_VL_GRPO.ipynb
@@ -1353,7 +1353,7 @@
         "    optim = \"adamw_8bit\",\n",
         "    logging_steps = 1,\n",
         "    log_completions = False,\n",
-        "    per_device_train_batch_size = 1,\n",
+        "    per_device_train_batch_size = 4,\n",
         "    gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n",
         "    num_generations = 4, # Decrease if out of memory\n",
         "    max_prompt_length = 1024,\n",
diff --git a/original_template/Qwen3_(4B)-GRPO.ipynb b/original_template/Qwen3_(4B)-GRPO.ipynb
index 2971f497..0b469467 100644
--- a/original_template/Qwen3_(4B)-GRPO.ipynb
+++ b/original_template/Qwen3_(4B)-GRPO.ipynb
@@ -2591,7 +2591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -2633,7 +2633,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 4,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 4, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb
index 67aac49d..036ca8a5 100644
--- a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb
+++ b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb
@@ -1255,7 +1255,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -1285,7 +1285,7 @@
         "    optim = \"adamw_8bit\",\n",
         "    logging_steps = 1,\n",
         "    log_completions = False,\n",
-        "    per_device_train_batch_size = 1,\n",
+        "    per_device_train_batch_size = 2,\n",
         "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
         "    num_generations = 2, # Decrease if out of memory\n",
         "    max_prompt_length = 1024,\n",
diff --git a/original_template/gpt-oss-(20B)-GRPO.ipynb b/original_template/gpt-oss-(20B)-GRPO.ipynb
index 1ed932d1..6abe9a59 100644
--- a/original_template/gpt-oss-(20B)-GRPO.ipynb
+++ b/original_template/gpt-oss-(20B)-GRPO.ipynb
@@ -1659,7 +1659,7 @@
         "    lr_scheduler_type = \"linear\",\n",
         "    optim = \"adamw_8bit\",\n",
         "    logging_steps = 1,\n",
-        "    per_device_train_batch_size = 1,\n",
+        "    per_device_train_batch_size = 2,\n",
         "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
         "    num_generations = 2, # Decrease if out of memory\n",
         "    max_prompt_length = max_prompt_length,\n",
diff --git a/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb b/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb
index 226aeae0..b7da0f96 100644
--- a/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb
+++ b/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb
@@ -1787,7 +1787,7 @@
     "    lr_scheduler_type = \"linear\",\n",
     "    optim = \"adamw_8bit\",\n",
     "    logging_steps = 1,\n",
-    "    per_device_train_batch_size = 1,\n",
+    "    per_device_train_batch_size = 2,\n",
     "    gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n",
     "    num_generations = 2, # Decrease if out of memory\n",
     "    max_prompt_length = max_prompt_length,\n",
diff --git a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py
index 0ee01230..371939f2 100644
--- a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py
+++ b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py
@@ -355,7 +355,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 4, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
index 658cf301..cd6e678d 100644
--- a/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
+++ b/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
@@ -488,7 +488,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Gemma3_(1B)-GRPO.py b/python_scripts/Gemma3_(1B)-GRPO.py
index b67873dc..bfc7dab3 100644
--- a/python_scripts/Gemma3_(1B)-GRPO.py
+++ b/python_scripts/Gemma3_(1B)-GRPO.py
@@ -335,7 +335,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[18]:
+# In[ ]:
 
 
 max_prompt_length = 256
@@ -350,7 +350,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Gemma3_(4B)-Vision-GRPO.py
index c4bb5e0e..3140acd5 100644
--- a/python_scripts/Gemma3_(4B)-Vision-GRPO.py
+++ b/python_scripts/Gemma3_(4B)-Vision-GRPO.py
@@ -297,7 +297,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[14]:
+# In[ ]:
 
 
 from trl import GRPOConfig, GRPOTrainer
@@ -311,7 +311,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 2, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
index cae46edc..3f071048 100644
--- a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py	
+++ b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py	
@@ -354,7 +354,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 8,
     gradient_accumulation_steps = 4, # Increase to 4 for smoother training
     num_generations = 8, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
index 0ee01230..371939f2 100644
--- a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py	
+++ b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py	
@@ -355,7 +355,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 4, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
index c4f3ca82..9edaaac3 100644
--- a/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py	
+++ b/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py	
@@ -490,7 +490,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py
index d99f852f..d319788c 100644
--- a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py	
@@ -337,7 +337,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[18]:
+# In[ ]:
 
 
 max_prompt_length = 256
@@ -352,7 +352,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py
index fb3d2d2b..d692fae9 100644
--- a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py	
@@ -299,7 +299,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[14]:
+# In[ ]:
 
 
 from trl import GRPOConfig, GRPOTrainer
@@ -313,7 +313,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 2, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py
index 21ac7970..4ccf75a7 100644
--- a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py	
@@ -199,7 +199,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[4]:
+# In[ ]:
 
 
 max_prompt_length = 256
@@ -214,7 +214,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py
index f6636b0a..cdcec7a7 100644
--- a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py	
@@ -214,7 +214,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py
index 239c98a8..634ab004 100644
--- a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py	
@@ -210,7 +210,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = 256,
diff --git a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py
index 6a3dab51..d50aa768 100644
--- a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py	
@@ -213,7 +213,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 8,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 8, # Decrease if out of memory
     max_prompt_length = 256,
diff --git a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py
index 01409e4a..6cf223bc 100644
--- a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py	
@@ -326,7 +326,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     optim = "adamw_8bit",
     logging_steps = 1,
     log_completions = False,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 2, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py
index 9be4441d..5cf68e51 100644
--- a/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py	
@@ -572,7 +572,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[30]:
+# In[ ]:
 
 
 max_prompt_length = maximum_length + 1 # + 1 just in case!
@@ -598,7 +598,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py
index 9977919f..c9288042 100644
--- a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py	
+++ b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py	
@@ -320,7 +320,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
 # 
 # Now set up the `GRPO` Trainer and all configurations! Note we actually enable `GSPO` as well!
 
-# In[18]:
+# In[ ]:
 
 
 from trl import GRPOConfig, GRPOTrainer
@@ -334,7 +334,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     optim = "adamw_8bit",
     logging_steps = 1,
     log_completions = False,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py b/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py
index 79ead469..0c62f87e 100644
--- a/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py	
+++ b/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py	
@@ -687,7 +687,7 @@ def speed_check(completions, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py
index 28596c24..3a621c17 100644
--- a/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py	
+++ b/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py	
@@ -687,7 +687,7 @@ def speed_check(completions, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
index 979e2167..30ff6665 100644
--- a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
+++ b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py
@@ -330,7 +330,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 8,
     gradient_accumulation_steps = 4, # Increase to 4 for smoother training
     num_generations = 8, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
index 813d9a00..58041e99 100644
--- a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
+++ b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py
@@ -333,7 +333,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 4, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
index b9c828a7..25b1e126 100644
--- a/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
+++ b/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py
@@ -466,7 +466,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py
index c27a4e67..f6d7e968 100644
--- a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py
+++ b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py
@@ -313,7 +313,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[18]:
+# In[ ]:
 
 
 max_prompt_length = 256
@@ -328,7 +328,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "cosine",
     optim = "adamw_torch_fused",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py
index aa8a42c5..84d6e61c 100644
--- a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py
+++ b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py
@@ -275,7 +275,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[14]:
+# In[ ]:
 
 
 from trl import GRPOConfig, GRPOTrainer
@@ -289,7 +289,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 2, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py
index e3de3741..4c876433 100644
--- a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py
+++ b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py
@@ -175,7 +175,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[4]:
+# In[ ]:
 
 
 max_prompt_length = 256
@@ -190,7 +190,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py
index 09f746f0..d7043bbb 100644
--- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py
+++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py
@@ -190,7 +190,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py
index 964b7e22..11a4d3c3 100644
--- a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py
+++ b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py
@@ -186,7 +186,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = 256,
diff --git a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py
index 201fb29e..17dbcc60 100644
--- a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py
+++ b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py
@@ -189,7 +189,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 8,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 8, # Decrease if out of memory
     max_prompt_length = 256,
diff --git a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py
index 73870f7f..0808d099 100644
--- a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py
+++ b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py
@@ -302,7 +302,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     optim = "adamw_8bit",
     logging_steps = 1,
     log_completions = False,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 2, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py b/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py
index 233619af..c1ae49d9 100644
--- a/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py
+++ b/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py
@@ -548,7 +548,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[30]:
+# In[ ]:
 
 
 max_prompt_length = maximum_length + 1 # + 1 just in case!
@@ -574,7 +574,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py
index 4e33ce96..85f8ec7f 100644
--- a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py
+++ b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py
@@ -296,7 +296,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
 # 
 # Now set up the `GRPO` Trainer and all configurations! Note we actually enable `GSPO` as well!
 
-# In[18]:
+# In[ ]:
 
 
 from trl import GRPOConfig, GRPOTrainer
@@ -310,7 +310,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     optim = "adamw_8bit",
     logging_steps = 1,
     log_completions = False,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py b/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py
index 5d609c71..6100ebd0 100644
--- a/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py
+++ b/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py
@@ -685,7 +685,7 @@ def speed_check(completions, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py
index 27c56aec..c970b081 100644
--- a/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py
+++ b/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py
@@ -685,7 +685,7 @@ def speed_check(completions, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Llama3.1_(8B)-GRPO.py b/python_scripts/Llama3.1_(8B)-GRPO.py
index 02b47f29..8704f763 100644
--- a/python_scripts/Llama3.1_(8B)-GRPO.py
+++ b/python_scripts/Llama3.1_(8B)-GRPO.py
@@ -197,7 +197,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[4]:
+# In[ ]:
 
 
 max_prompt_length = 256
@@ -212,7 +212,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Mistral_v0.3_(7B)-GRPO.py
index b5630f32..28a6a5a7 100644
--- a/python_scripts/Mistral_v0.3_(7B)-GRPO.py
+++ b/python_scripts/Mistral_v0.3_(7B)-GRPO.py
@@ -212,7 +212,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/Phi_4_(14B)-GRPO.py b/python_scripts/Phi_4_(14B)-GRPO.py
index 605e0b5a..fbaa60ef 100644
--- a/python_scripts/Phi_4_(14B)-GRPO.py
+++ b/python_scripts/Phi_4_(14B)-GRPO.py
@@ -208,7 +208,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "paged_adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 6,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 6, # Decrease if out of memory
     max_prompt_length = 256,
diff --git a/python_scripts/Qwen2.5_(3B)-GRPO.py b/python_scripts/Qwen2.5_(3B)-GRPO.py
index b281f211..b68637ea 100644
--- a/python_scripts/Qwen2.5_(3B)-GRPO.py
+++ b/python_scripts/Qwen2.5_(3B)-GRPO.py
@@ -211,7 +211,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]:
     lr_scheduler_type = "cosine",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 8,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 8, # Decrease if out of memory
     max_prompt_length = 256,
diff --git a/python_scripts/Qwen2_5_7B_VL_GRPO.py b/python_scripts/Qwen2_5_7B_VL_GRPO.py
index bc38b951..84212ef8 100644
--- a/python_scripts/Qwen2_5_7B_VL_GRPO.py
+++ b/python_scripts/Qwen2_5_7B_VL_GRPO.py
@@ -324,7 +324,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa
     optim = "adamw_8bit",
     logging_steps = 1,
     log_completions = False,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 2, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = 1024,
diff --git a/python_scripts/Qwen3_(4B)-GRPO.py b/python_scripts/Qwen3_(4B)-GRPO.py
index 04a0e2a8..e53bbf7b 100644
--- a/python_scripts/Qwen3_(4B)-GRPO.py
+++ b/python_scripts/Qwen3_(4B)-GRPO.py
@@ -570,7 +570,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
 # 
 # Now set up GRPO Trainer and all configurations!
 
-# In[30]:
+# In[ ]:
 
 
 max_prompt_length = maximum_length + 1 # + 1 just in case!
@@ -596,7 +596,7 @@ def check_numbers(prompts, completions, answer, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 4,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 4, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/gpt-oss-(20B)-GRPO.py b/python_scripts/gpt-oss-(20B)-GRPO.py
index 5d609c71..6100ebd0 100644
--- a/python_scripts/gpt-oss-(20B)-GRPO.py
+++ b/python_scripts/gpt-oss-(20B)-GRPO.py
@@ -685,7 +685,7 @@ def speed_check(completions, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = max_prompt_length,
diff --git a/python_scripts/gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/gpt_oss_(20B)_GRPO_BF16.py
index 27c56aec..c970b081 100644
--- a/python_scripts/gpt_oss_(20B)_GRPO_BF16.py
+++ b/python_scripts/gpt_oss_(20B)_GRPO_BF16.py
@@ -685,7 +685,7 @@ def speed_check(completions, **kwargs):
     lr_scheduler_type = "linear",
     optim = "adamw_8bit",
     logging_steps = 1,
-    per_device_train_batch_size = 1,
+    per_device_train_batch_size = 2,
     gradient_accumulation_steps = 1, # Increase to 4 for smoother training
     num_generations = 2, # Decrease if out of memory
     max_prompt_length = max_prompt_length,