From 0114b779decfaadf0888c002eae1a19334258dc6 Mon Sep 17 00:00:00 2001 From: DoubleMathew Date: Tue, 18 Nov 2025 13:55:52 -0600 Subject: [PATCH] update grpo notebooks to have per_device_train_batch_size=num_generations --- nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 10 +- nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 2 +- nb/Gemma3_(1B)-GRPO.ipynb | 169 ++++++++---------- nb/Gemma3_(4B)-Vision-GRPO.ipynb | 4 +- ...rse-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 2 +- ...rse-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 10 +- ...rse-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 2 +- nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb | 169 ++++++++---------- ...gFace Course-Gemma3_(4B)-Vision-GRPO.ipynb | 4 +- ...uggingFace Course-Llama3.1_(8B)-GRPO.ipynb | 4 +- ...ngFace Course-Mistral_v0.3_(7B)-GRPO.ipynb | 2 +- nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb | 2 +- nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb | 2 +- ...uggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb | 2 +- nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb | 4 +- ...ace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb | 4 +- ...uggingFace Course-gpt-oss-(20B)-GRPO.ipynb | 2 +- ...gFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb | 2 +- ...gle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 2 +- ...gle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 10 +- ...gle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 2 +- nb/Kaggle-Gemma3_(1B)-GRPO.ipynb | 154 +++++++--------- nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb | 4 +- nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb | 4 +- nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb | 2 +- nb/Kaggle-Phi_4_(14B)-GRPO.ipynb | 2 +- nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb | 2 +- nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb | 2 +- nb/Kaggle-Qwen3_(4B)-GRPO.ipynb | 4 +- nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb | 4 +- nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb | 2 +- nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb | 2 +- nb/Llama3.1_(8B)-GRPO.ipynb | 4 +- nb/Mistral_v0.3_(7B)-GRPO.ipynb | 2 +- nb/Phi_4_(14B)-GRPO.ipynb | 2 +- nb/Qwen2.5_(3B)-GRPO.ipynb | 2 +- nb/Qwen2_5_7B_VL_GRPO.ipynb | 2 +- nb/Qwen3_(4B)-GRPO.ipynb | 4 +- nb/gpt-oss-(20B)-GRPO.ipynb | 2 +- nb/gpt_oss_(20B)_GRPO_BF16.ipynb | 2 +- .../Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 2 +- .../Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 10 +- .../DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 2 +- original_template/Gemma3_(1B)-GRPO.ipynb | 4 +- .../Gemma3_(4B)-Vision-GRPO.ipynb | 4 +- original_template/Llama3.1_(8B)-GRPO.ipynb | 4 +- .../Mistral_v0.3_(7B)-GRPO.ipynb | 2 +- original_template/Phi_4_(14B)-GRPO.ipynb | 2 +- original_template/Qwen2.5_(3B)-GRPO.ipynb | 2 +- original_template/Qwen2_5_7B_VL_GRPO.ipynb | 2 +- original_template/Qwen3_(4B)-GRPO.ipynb | 4 +- .../Qwen3_VL_(8B)-Vision-GRPO.ipynb | 4 +- original_template/gpt-oss-(20B)-GRPO.ipynb | 2 +- .../gpt_oss_(20B)_GRPO_BF16.ipynb | 2 +- .../Advanced_Llama3_2_(3B)_GRPO_LoRA.py | 2 +- .../DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py | 2 +- python_scripts/Gemma3_(1B)-GRPO.py | 4 +- python_scripts/Gemma3_(4B)-Vision-GRPO.py | 4 +- ...Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py | 2 +- ...Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py | 2 +- ...Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py | 2 +- .../HuggingFace Course-Gemma3_(1B)-GRPO.py | 4 +- ...gingFace Course-Gemma3_(4B)-Vision-GRPO.py | 4 +- .../HuggingFace Course-Llama3.1_(8B)-GRPO.py | 4 +- ...ggingFace Course-Mistral_v0.3_(7B)-GRPO.py | 2 +- .../HuggingFace Course-Phi_4_(14B)-GRPO.py | 2 +- .../HuggingFace Course-Qwen2.5_(3B)-GRPO.py | 2 +- .../HuggingFace Course-Qwen2_5_7B_VL_GRPO.py | 2 +- .../HuggingFace Course-Qwen3_(4B)-GRPO.py | 4 +- ...ngFace Course-Qwen3_VL_(8B)-Vision-GRPO.py | 4 +- .../HuggingFace Course-gpt-oss-(20B)-GRPO.py | 2 +- ...gingFace Course-gpt_oss_(20B)_GRPO_BF16.py | 2 +- ...Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py | 2 +- ...Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py | 2 +- ...Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py | 2 +- python_scripts/Kaggle-Gemma3_(1B)-GRPO.py | 4 +- .../Kaggle-Gemma3_(4B)-Vision-GRPO.py | 4 +- python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py | 4 +- .../Kaggle-Mistral_v0.3_(7B)-GRPO.py | 2 +- python_scripts/Kaggle-Phi_4_(14B)-GRPO.py | 2 +- python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py | 2 +- python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py | 2 +- python_scripts/Kaggle-Qwen3_(4B)-GRPO.py | 4 +- .../Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py | 4 +- python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py | 2 +- .../Kaggle-gpt_oss_(20B)_GRPO_BF16.py | 2 +- python_scripts/Llama3.1_(8B)-GRPO.py | 4 +- python_scripts/Mistral_v0.3_(7B)-GRPO.py | 2 +- python_scripts/Phi_4_(14B)-GRPO.py | 2 +- python_scripts/Qwen2.5_(3B)-GRPO.py | 2 +- python_scripts/Qwen2_5_7B_VL_GRPO.py | 2 +- python_scripts/Qwen3_(4B)-GRPO.py | 4 +- python_scripts/gpt-oss-(20B)-GRPO.py | 2 +- python_scripts/gpt_oss_(20B)_GRPO_BF16.py | 2 +- 94 files changed, 346 insertions(+), 420 deletions(-) diff --git a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index 4e990e33..7cbaeea4 100644 --- a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -3,8 +3,8 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -1257,7 +1257,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -12746,8 +12746,8 @@ "accelerator": "GPU", "colab": { "gpuType": "T4", - "provenance": [], - "include_colab_link": true + "include_colab_link": true, + "provenance": [] }, "kernelspec": { "display_name": "Python 3", diff --git a/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 30b813a1..66f059c7 100644 --- a/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -1711,7 +1711,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Gemma3_(1B)-GRPO.ipynb b/nb/Gemma3_(1B)-GRPO.ipynb index 0405b1c4..e933b423 100644 --- a/nb/Gemma3_(1B)-GRPO.ipynb +++ b/nb/Gemma3_(1B)-GRPO.ipynb @@ -8,7 +8,7 @@ "
\n", "\n", "\n", - " Join Discord if you need help + ⭐ Star us on Github ⭐\n", + " Join Discord if you need help + \u2b50 Star us on Github \u2b50\n", "
\n", "\n", "To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n", @@ -51,42 +51,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "%%capture\n", - "import os\n", - "os.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n", - "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n", - " # If you're not in Colab, just use pip install or uv pip install\n", - " !pip install unsloth vllm\n", - "else:\n", - " pass # For Colab / Kaggle, we need extra instructions hidden below \\/" - ] + "source": "%%capture\nimport os\nos.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n # If you're not in Colab, just use pip install or uv pip install\n !pip install unsloth vllm\nelse:\n pass # For Colab / Kaggle, we need extra instructions hidden below \\/" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "#@title Colab Extra Install { display-mode: \"form\" }\n", - "%%capture\n", - "import os\n", - "!pip install --upgrade -qqq uv\n", - "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n", - " # If you're not in Colab, just use pip install!\n", - " !pip install unsloth vllm\n", - "else:\n", - " try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n", - " except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n", - " try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n", - " except: is_t4 = False\n", - " get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n", - " !uv pip install -qqq --upgrade \\\n", - " unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n", - " !uv pip install -qqq {get_triton}\n", - "!uv pip install transformers==4.56.2\n", - "!uv pip install --no-deps trl==0.22.2" - ] + "source": "#@title Colab Extra Install { display-mode: \"form\" }\n%%capture\nimport os\n!pip install --upgrade -qqq uv\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n # If you're not in Colab, just use pip install!\n !pip install unsloth vllm\nelse:\n try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n except: is_t4 = False\n get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n !uv pip install -qqq --upgrade \\\n unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n !uv pip install -qqq {get_triton}\n!uv pip install transformers==4.56.2\n!uv pip install --no-deps trl==0.22.2" }, { "cell_type": "markdown", @@ -201,8 +173,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "πŸ¦₯ Unsloth Zoo will now patch everything to make training faster!\n", + "\ud83e\udda5 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", + "\ud83e\udda5 Unsloth Zoo will now patch everything to make training faster!\n", "INFO 03-19 15:51:40 [__init__.py:256] Automatically detected platform cuda.\n", "==((====))== Unsloth 2025.3.17: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.0.\n", " \\\\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n", @@ -1060,7 +1032,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1091,7 +1063,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -1395,15 +1367,15 @@ "Loan amount: $480,000\n", "Interest rate: We need to assume an interest rate for this problem. Let's assume an annual interest rate of 6% (this is a common rate).\n", "Loan term: 20 years, so 20 * 12 = 240 months\n", - "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n", + "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n", "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n", "\n", "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n", "Number of months (n) = 240\n", "\n", - "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n", - "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n", - "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 – 1]\n", + "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n", + "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n", + "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 \u2013 1]\n", "M = 480000 [ 0.016554265] / [2.310853]\n", "M = 480000 * 0.00703658\n", "M = $331.54\n", @@ -1412,15 +1384,15 @@ "Loan amount: $120,000\n", "Interest rate: We still assume an annual interest rate of 6% (this is a common rate).\n", "Loan term: 20 years, so 20 * 12 = 240 months\n", - "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n", + "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n", "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n", "\n", "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n", "Number of months (n) = 240\n", "\n", - "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n", - "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n", - "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 – 1]\n", + "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n", + "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n", + "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 \u2013 1]\n", "M = 120000 [ 0.016554265] / [2.310853]\n", "M = 1200 \n", "Extracted:\n", @@ -1985,10 +1957,9 @@ " \n", " \n", "\n", - " Join Discord if you need help + ⭐️ Star us on Github ⭐️\n", + " Join Discord if you need help + \u2b50\ufe0f Star us on Github \u2b50\ufe0f\n", "\n", - "\n", - " This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n" + "\n This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n" ] } ], @@ -2266,9 +2237,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_551b94fe4b3c4a4f8f3220d013a6d897", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_29d36346bc75470eacd30aebf1423e14", - "value": "tokenizer.json: 100%" + "value": "tokenizer.json:\u2007100%" } }, "0ef32700424c4799b8216de4ed8bbbb9": { @@ -2378,9 +2349,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_383d2f04eccd4e38ad791494a06423ed", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_36cabdee7cd645a58e1ffc794674a322", - "value": "special_tokens_map.json: 100%" + "value": "special_tokens_map.json:\u2007100%" } }, "1664b23faa1b4292bb5727bd525c45be": { @@ -2473,9 +2444,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_772360d1551141c5ab0a877ae4ed1c76", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_b0c51c819fff44c5a6e8e626fec9e937", - "value": "tokenizer_config.json: 100%" + "value": "tokenizer_config.json:\u2007100%" } }, "19731cf654e64eb3905f02f4ae277e8c": { @@ -2870,9 +2841,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6decae16dd5b404ba272d89df9c1372b", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_9151ae8e05634cf4a8ce42677f211bac", - "value": "Map: 100%" + "value": "Map:\u2007100%" } }, "340c07f1b7b14527a378880f9166ef55": { @@ -3028,9 +2999,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_671b4725f18a4c809b3c97a65ec9e405", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_faa61469d4bc46aebe74d5401d42c3ab", - "value": " 35.0/35.0 [00:00<00:00, 2.66kB/s]" + "value": "\u200735.0/35.0\u2007[00:00<00:00,\u20072.66kB/s]" } }, "3fabb349d1f943b09ed25784a0d0ab0a": { @@ -3353,9 +3324,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cf83a6e558f64ff98276981a82f3b2ac", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_0ef32700424c4799b8216de4ed8bbbb9", - "value": "test-00000-of-00001.parquet: 100%" + "value": "test-00000-of-00001.parquet:\u2007100%" } }, "50503005b58845b79254f89f95fb03d9": { @@ -3398,9 +3369,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d1e38bbf7593462bb87b14bacee9e3d9", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_846c1dd9b3214c68bbfc362166a6b9ee", - "value": " 7473/7473 [00:00<00:00, 5203.68 examples/s]" + "value": "\u20077473/7473\u2007[00:00<00:00,\u20075203.68\u2007examples/s]" } }, "54c603df4e174b70bd03bce59de287b2": { @@ -3511,9 +3482,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9677dc2a2f4847d89d3ca42c01435205", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_645b707de96a4c2eb07eb118db311fb6", - "value": "Generating train split: 100%" + "value": "Generating\u2007train\u2007split:\u2007100%" } }, "61d7ca5d56f14d7d93d5cf5e5b712da2": { @@ -4060,9 +4031,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_669cb00554134e8287d0daae7372d397", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_9e7bb56731134d9cba89cbde208358e0", - "value": "train-00000-of-00001.parquet: 100%" + "value": "train-00000-of-00001.parquet:\u2007100%" } }, "7c4cd321445b43bfa818b599d42c8cb7": { @@ -4081,9 +4052,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cec948b41ee348fcbdf2f5fa290ffe42", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_6d1b19657db94907b6fdf0a3b3a05e84", - "value": "README.md: 100%" + "value": "README.md:\u2007100%" } }, "7d4b49f0c54046a89039d25ee7c11f6f": { @@ -4258,9 +4229,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1da84b32592d432fb1a57358ddcdceff", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_bbcd20b75ce445bea7be9597efa68d73", - "value": " 33.4M/33.4M [00:00<00:00, 144MB/s]" + "value": "\u200733.4M/33.4M\u2007[00:00<00:00,\u2007144MB/s]" } }, "8464d2f310b045808bcd7206aa7c8cc5": { @@ -4309,9 +4280,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_70a0b21c05e642e5891bb8b91cfb2217", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_444aa81458bf4790915c188069506864", - "value": "tokenizer.model: 100%" + "value": "tokenizer.model:\u2007100%" } }, "859b75a69281413383fc1c0946bf63d0": { @@ -4361,9 +4332,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_02d6b8e9a02b470a92f700d9e7fea5d4", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8464d2f310b045808bcd7206aa7c8cc5", - "value": "Generating test split: 100%" + "value": "Generating\u2007test\u2007split:\u2007100%" } }, "8a21cb24786049e2946c9a050a6673ab": { @@ -4434,9 +4405,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0c96fa0b7ed344f4be44632451b406d7", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_bf96f070c2f54468a674f11681fcb22d", - "value": " 2.31M/2.31M [00:00<00:00, 17.7MB/s]" + "value": "\u20072.31M/2.31M\u2007[00:00<00:00,\u200717.7MB/s]" } }, "8ca7d620dc5d415a83b50f134317d925": { @@ -4828,9 +4799,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_67eda18557dc461483821ffd44a25eb4", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_4cabdb178b97464e875d507dd1410bbf", - "value": " 7.94k/7.94k [00:00<00:00, 179kB/s]" + "value": "\u20077.94k/7.94k\u2007[00:00<00:00,\u2007179kB/s]" } }, "9e7bb56731134d9cba89cbde208358e0": { @@ -5050,9 +5021,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_def9dbef26334436a6b2298b5b153f47", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_eea838fbfd2c4e82b1769fde1036487e", - "value": " 4.69M/4.69M [00:00<00:00, 26.2MB/s]" + "value": "\u20074.69M/4.69M\u2007[00:00<00:00,\u200726.2MB/s]" } }, "a9dbcb0e164544ba8321a7916500009d": { @@ -5227,9 +5198,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45aa2714bf0b44e9af7668c674f63863", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8ca7d620dc5d415a83b50f134317d925", - "value": " 215/215 [00:00<00:00, 13.4kB/s]" + "value": "\u2007215/215\u2007[00:00<00:00,\u200713.4kB/s]" } }, "b0c51c819fff44c5a6e8e626fec9e937": { @@ -5263,9 +5234,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d9e72108a08b447ea2a29932391fe429", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_21366789f69d431bad2ac2c8d1ec1565", - "value": " 2.00G/2.00G [00:15<00:00, 71.2MB/s]" + "value": "\u20072.00G/2.00G\u2007[00:15<00:00,\u200771.2MB/s]" } }, "b67486cb78274f2baa4f9afdc2fd7e3c": { @@ -5284,9 +5255,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_21f2aec6d7af4c3a82306ad235674829", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_fd6ff32a3aa7479e8b5d0cda47819742", - "value": "model.safetensors: 100%" + "value": "model.safetensors:\u2007100%" } }, "ba4ac2596dda42698ae048f9b7a11c61": { @@ -5865,9 +5836,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8029855dfa614963a7f5ff11d48dcbdb", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_e74fcfb683074f27806b6aaae329b74d", - "value": " 670/670 [00:00<00:00, 74.3kB/s]" + "value": "\u2007670/670\u2007[00:00<00:00,\u200774.3kB/s]" } }, "d7b48dafe15947c9b225681c4a326581": { @@ -5975,9 +5946,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9dbcb0e164544ba8321a7916500009d", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_a6030d7b5bbd460fb5aa1356c607ec82", - "value": " 419k/419k [00:00<00:00, 9.62MB/s]" + "value": "\u2007419k/419k\u2007[00:00<00:00,\u20079.62MB/s]" } }, "def9dbef26334436a6b2298b5b153f47": { @@ -6115,9 +6086,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_25c7874a020344e7aad2869f7a27db4e", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_86e9660b2d72460c836d9bba348be56b", - "value": " 1319/1319 [00:00<00:00, 12220.76 examples/s]" + "value": "\u20071319/1319\u2007[00:00<00:00,\u200712220.76\u2007examples/s]" } }, "ed53e921682348b28e2be40eaf96cbf7": { @@ -6160,9 +6131,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_185b378797334c819f8a199760cac945", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_a1fced067e1e40cbb08e25c9377123e0", - "value": "generation_config.json: 100%" + "value": "generation_config.json:\u2007100%" } }, "eea838fbfd2c4e82b1769fde1036487e": { @@ -6196,9 +6167,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45eda31f22294728bbca360abad799e6", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_d7b48dafe15947c9b225681c4a326581", - "value": " 1.16M/1.16M [00:00<00:00, 6.41MB/s]" + "value": "\u20071.16M/1.16M\u2007[00:00<00:00,\u20076.41MB/s]" } }, "efaa563da24149aaaa153b7e8c473394": { @@ -6376,9 +6347,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_aaf739645f654cd6a94d897358b2c2e0", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_f0c1afb62d8a4616a8df27c9de866916", - "value": "added_tokens.json: 100%" + "value": "added_tokens.json:\u2007100%" } }, "f6a520e1570d4f7faffc1b6e2c6200d2": { @@ -6397,9 +6368,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_10c20527dc19466eb4d6eb325529a0df", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8f2974213b954318902d39a1fda9b3fb", - "value": " 7473/7473 [00:00<00:00, 9263.20 examples/s]" + "value": "\u20077473/7473\u2007[00:00<00:00,\u20079263.20\u2007examples/s]" } }, "f73516b2403f411d925618cfa2af5f46": { @@ -6462,4 +6433,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/nb/Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Gemma3_(4B)-Vision-GRPO.ipynb index 7f1b8d60..ece4d662 100644 --- a/nb/Gemma3_(4B)-Vision-GRPO.ipynb +++ b/nb/Gemma3_(4B)-Vision-GRPO.ipynb @@ -1114,7 +1114,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1144,7 +1144,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index 1db244ca..46b67f4c 100644 --- a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -819,7 +819,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index 4e990e33..7cbaeea4 100644 --- a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -3,8 +3,8 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -1257,7 +1257,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -12746,8 +12746,8 @@ "accelerator": "GPU", "colab": { "gpuType": "T4", - "provenance": [], - "include_colab_link": true + "include_colab_link": true, + "provenance": [] }, "kernelspec": { "display_name": "Python 3", diff --git a/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 87101e65..aef03031 100644 --- a/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -1713,7 +1713,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb index 7ea19cff..a8f7474e 100644 --- a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb @@ -8,7 +8,7 @@ "
\n", "\n", "\n", - " Join Discord if you need help + ⭐ Star us on Github ⭐\n", + " Join Discord if you need help + \u2b50 Star us on Github \u2b50\n", "
\n", "\n", "In this [Hugging Face](https://huggingface.co/learn/nlp-course/en/chapter12/6?fw=pt) and Unsloth notebook, you will learn to transform Gemma3 (1B) GRPO into a Reasoning model using GRPO.\n", @@ -53,42 +53,14 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "%%capture\n", - "import os\n", - "os.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n", - "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n", - " # If you're not in Colab, just use pip install or uv pip install\n", - " !pip install unsloth vllm\n", - "else:\n", - " pass # For Colab / Kaggle, we need extra instructions hidden below \\/" - ] + "source": "%%capture\nimport os\nos.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n # If you're not in Colab, just use pip install or uv pip install\n !pip install unsloth vllm\nelse:\n pass # For Colab / Kaggle, we need extra instructions hidden below \\/" }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "#@title Colab Extra Install { display-mode: \"form\" }\n", - "%%capture\n", - "import os\n", - "!pip install --upgrade -qqq uv\n", - "if \"COLAB_\" not in \"\".join(os.environ.keys()):\n", - " # If you're not in Colab, just use pip install!\n", - " !pip install unsloth vllm\n", - "else:\n", - " try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n", - " except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n", - " try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n", - " except: is_t4 = False\n", - " get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n", - " !uv pip install -qqq --upgrade \\\n", - " unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n", - " !uv pip install -qqq {get_triton}\n", - "!uv pip install transformers==4.56.2\n", - "!uv pip install --no-deps trl==0.22.2" - ] + "source": "#@title Colab Extra Install { display-mode: \"form\" }\n%%capture\nimport os\n!pip install --upgrade -qqq uv\nif \"COLAB_\" not in \"\".join(os.environ.keys()):\n # If you're not in Colab, just use pip install!\n !pip install unsloth vllm\nelse:\n try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n except: is_t4 = False\n get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n !uv pip install -qqq --upgrade \\\n unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n !uv pip install -qqq {get_triton}\n!uv pip install transformers==4.56.2\n!uv pip install --no-deps trl==0.22.2" }, { "cell_type": "markdown", @@ -203,8 +175,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "πŸ¦₯ Unsloth Zoo will now patch everything to make training faster!\n", + "\ud83e\udda5 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", + "\ud83e\udda5 Unsloth Zoo will now patch everything to make training faster!\n", "INFO 03-19 15:51:40 [__init__.py:256] Automatically detected platform cuda.\n", "==((====))== Unsloth 2025.3.17: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.0.\n", " \\\\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n", @@ -1062,7 +1034,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1093,7 +1065,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -1397,15 +1369,15 @@ "Loan amount: $480,000\n", "Interest rate: We need to assume an interest rate for this problem. Let's assume an annual interest rate of 6% (this is a common rate).\n", "Loan term: 20 years, so 20 * 12 = 240 months\n", - "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n", + "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n", "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n", "\n", "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n", "Number of months (n) = 240\n", "\n", - "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n", - "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n", - "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 – 1]\n", + "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n", + "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n", + "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 \u2013 1]\n", "M = 480000 [ 0.016554265] / [2.310853]\n", "M = 480000 * 0.00703658\n", "M = $331.54\n", @@ -1414,15 +1386,15 @@ "Loan amount: $120,000\n", "Interest rate: We still assume an annual interest rate of 6% (this is a common rate).\n", "Loan term: 20 years, so 20 * 12 = 240 months\n", - "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n", + "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n", "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n", "\n", "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n", "Number of months (n) = 240\n", "\n", - "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n", - "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n", - "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 – 1]\n", + "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n", + "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n", + "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 \u2013 1]\n", "M = 120000 [ 0.016554265] / [2.310853]\n", "M = 1200 \n", "Extracted:\n", @@ -1987,10 +1959,9 @@ " \n", " \n", "\n", - " Join Discord if you need help + ⭐️ Star us on Github ⭐️\n", + " Join Discord if you need help + \u2b50\ufe0f Star us on Github \u2b50\ufe0f\n", "\n", - "\n", - " This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n" + "\n This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n" ] } ], @@ -2268,9 +2239,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_551b94fe4b3c4a4f8f3220d013a6d897", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_29d36346bc75470eacd30aebf1423e14", - "value": "tokenizer.json: 100%" + "value": "tokenizer.json:\u2007100%" } }, "0ef32700424c4799b8216de4ed8bbbb9": { @@ -2380,9 +2351,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_383d2f04eccd4e38ad791494a06423ed", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_36cabdee7cd645a58e1ffc794674a322", - "value": "special_tokens_map.json: 100%" + "value": "special_tokens_map.json:\u2007100%" } }, "1664b23faa1b4292bb5727bd525c45be": { @@ -2475,9 +2446,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_772360d1551141c5ab0a877ae4ed1c76", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_b0c51c819fff44c5a6e8e626fec9e937", - "value": "tokenizer_config.json: 100%" + "value": "tokenizer_config.json:\u2007100%" } }, "19731cf654e64eb3905f02f4ae277e8c": { @@ -2872,9 +2843,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6decae16dd5b404ba272d89df9c1372b", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_9151ae8e05634cf4a8ce42677f211bac", - "value": "Map: 100%" + "value": "Map:\u2007100%" } }, "340c07f1b7b14527a378880f9166ef55": { @@ -3030,9 +3001,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_671b4725f18a4c809b3c97a65ec9e405", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_faa61469d4bc46aebe74d5401d42c3ab", - "value": " 35.0/35.0 [00:00<00:00, 2.66kB/s]" + "value": "\u200735.0/35.0\u2007[00:00<00:00,\u20072.66kB/s]" } }, "3fabb349d1f943b09ed25784a0d0ab0a": { @@ -3355,9 +3326,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cf83a6e558f64ff98276981a82f3b2ac", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_0ef32700424c4799b8216de4ed8bbbb9", - "value": "test-00000-of-00001.parquet: 100%" + "value": "test-00000-of-00001.parquet:\u2007100%" } }, "50503005b58845b79254f89f95fb03d9": { @@ -3400,9 +3371,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d1e38bbf7593462bb87b14bacee9e3d9", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_846c1dd9b3214c68bbfc362166a6b9ee", - "value": " 7473/7473 [00:00<00:00, 5203.68 examples/s]" + "value": "\u20077473/7473\u2007[00:00<00:00,\u20075203.68\u2007examples/s]" } }, "54c603df4e174b70bd03bce59de287b2": { @@ -3513,9 +3484,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9677dc2a2f4847d89d3ca42c01435205", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_645b707de96a4c2eb07eb118db311fb6", - "value": "Generating train split: 100%" + "value": "Generating\u2007train\u2007split:\u2007100%" } }, "61d7ca5d56f14d7d93d5cf5e5b712da2": { @@ -4062,9 +4033,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_669cb00554134e8287d0daae7372d397", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_9e7bb56731134d9cba89cbde208358e0", - "value": "train-00000-of-00001.parquet: 100%" + "value": "train-00000-of-00001.parquet:\u2007100%" } }, "7c4cd321445b43bfa818b599d42c8cb7": { @@ -4083,9 +4054,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cec948b41ee348fcbdf2f5fa290ffe42", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_6d1b19657db94907b6fdf0a3b3a05e84", - "value": "README.md: 100%" + "value": "README.md:\u2007100%" } }, "7d4b49f0c54046a89039d25ee7c11f6f": { @@ -4260,9 +4231,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1da84b32592d432fb1a57358ddcdceff", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_bbcd20b75ce445bea7be9597efa68d73", - "value": " 33.4M/33.4M [00:00<00:00, 144MB/s]" + "value": "\u200733.4M/33.4M\u2007[00:00<00:00,\u2007144MB/s]" } }, "8464d2f310b045808bcd7206aa7c8cc5": { @@ -4311,9 +4282,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_70a0b21c05e642e5891bb8b91cfb2217", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_444aa81458bf4790915c188069506864", - "value": "tokenizer.model: 100%" + "value": "tokenizer.model:\u2007100%" } }, "859b75a69281413383fc1c0946bf63d0": { @@ -4363,9 +4334,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_02d6b8e9a02b470a92f700d9e7fea5d4", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8464d2f310b045808bcd7206aa7c8cc5", - "value": "Generating test split: 100%" + "value": "Generating\u2007test\u2007split:\u2007100%" } }, "8a21cb24786049e2946c9a050a6673ab": { @@ -4436,9 +4407,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0c96fa0b7ed344f4be44632451b406d7", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_bf96f070c2f54468a674f11681fcb22d", - "value": " 2.31M/2.31M [00:00<00:00, 17.7MB/s]" + "value": "\u20072.31M/2.31M\u2007[00:00<00:00,\u200717.7MB/s]" } }, "8ca7d620dc5d415a83b50f134317d925": { @@ -4830,9 +4801,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_67eda18557dc461483821ffd44a25eb4", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_4cabdb178b97464e875d507dd1410bbf", - "value": " 7.94k/7.94k [00:00<00:00, 179kB/s]" + "value": "\u20077.94k/7.94k\u2007[00:00<00:00,\u2007179kB/s]" } }, "9e7bb56731134d9cba89cbde208358e0": { @@ -5052,9 +5023,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_def9dbef26334436a6b2298b5b153f47", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_eea838fbfd2c4e82b1769fde1036487e", - "value": " 4.69M/4.69M [00:00<00:00, 26.2MB/s]" + "value": "\u20074.69M/4.69M\u2007[00:00<00:00,\u200726.2MB/s]" } }, "a9dbcb0e164544ba8321a7916500009d": { @@ -5229,9 +5200,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45aa2714bf0b44e9af7668c674f63863", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8ca7d620dc5d415a83b50f134317d925", - "value": " 215/215 [00:00<00:00, 13.4kB/s]" + "value": "\u2007215/215\u2007[00:00<00:00,\u200713.4kB/s]" } }, "b0c51c819fff44c5a6e8e626fec9e937": { @@ -5265,9 +5236,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d9e72108a08b447ea2a29932391fe429", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_21366789f69d431bad2ac2c8d1ec1565", - "value": " 2.00G/2.00G [00:15<00:00, 71.2MB/s]" + "value": "\u20072.00G/2.00G\u2007[00:15<00:00,\u200771.2MB/s]" } }, "b67486cb78274f2baa4f9afdc2fd7e3c": { @@ -5286,9 +5257,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_21f2aec6d7af4c3a82306ad235674829", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_fd6ff32a3aa7479e8b5d0cda47819742", - "value": "model.safetensors: 100%" + "value": "model.safetensors:\u2007100%" } }, "ba4ac2596dda42698ae048f9b7a11c61": { @@ -5867,9 +5838,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8029855dfa614963a7f5ff11d48dcbdb", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_e74fcfb683074f27806b6aaae329b74d", - "value": " 670/670 [00:00<00:00, 74.3kB/s]" + "value": "\u2007670/670\u2007[00:00<00:00,\u200774.3kB/s]" } }, "d7b48dafe15947c9b225681c4a326581": { @@ -5977,9 +5948,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9dbcb0e164544ba8321a7916500009d", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_a6030d7b5bbd460fb5aa1356c607ec82", - "value": " 419k/419k [00:00<00:00, 9.62MB/s]" + "value": "\u2007419k/419k\u2007[00:00<00:00,\u20079.62MB/s]" } }, "def9dbef26334436a6b2298b5b153f47": { @@ -6117,9 +6088,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_25c7874a020344e7aad2869f7a27db4e", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_86e9660b2d72460c836d9bba348be56b", - "value": " 1319/1319 [00:00<00:00, 12220.76 examples/s]" + "value": "\u20071319/1319\u2007[00:00<00:00,\u200712220.76\u2007examples/s]" } }, "ed53e921682348b28e2be40eaf96cbf7": { @@ -6162,9 +6133,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_185b378797334c819f8a199760cac945", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_a1fced067e1e40cbb08e25c9377123e0", - "value": "generation_config.json: 100%" + "value": "generation_config.json:\u2007100%" } }, "eea838fbfd2c4e82b1769fde1036487e": { @@ -6198,9 +6169,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45eda31f22294728bbca360abad799e6", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_d7b48dafe15947c9b225681c4a326581", - "value": " 1.16M/1.16M [00:00<00:00, 6.41MB/s]" + "value": "\u20071.16M/1.16M\u2007[00:00<00:00,\u20076.41MB/s]" } }, "efaa563da24149aaaa153b7e8c473394": { @@ -6378,9 +6349,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_aaf739645f654cd6a94d897358b2c2e0", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_f0c1afb62d8a4616a8df27c9de866916", - "value": "added_tokens.json: 100%" + "value": "added_tokens.json:\u2007100%" } }, "f6a520e1570d4f7faffc1b6e2c6200d2": { @@ -6399,9 +6370,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_10c20527dc19466eb4d6eb325529a0df", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8f2974213b954318902d39a1fda9b3fb", - "value": " 7473/7473 [00:00<00:00, 9263.20 examples/s]" + "value": "\u20077473/7473\u2007[00:00<00:00,\u20079263.20\u2007examples/s]" } }, "f73516b2403f411d925618cfa2af5f46": { @@ -6464,4 +6435,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb index 0993902d..b4dcef32 100644 --- a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb +++ b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb @@ -1116,7 +1116,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1146,7 +1146,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb index 49fdfd4c..abb1f8e1 100644 --- a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb @@ -739,7 +739,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -770,7 +770,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb index cea2ec93..02489321 100644 --- a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb @@ -614,7 +614,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb index cc4f3d6c..41da309c 100644 --- a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb @@ -392,7 +392,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb index be8cb875..7c98ce5e 100644 --- a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb @@ -909,7 +909,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb index 304290c2..6fc7c400 100644 --- a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb @@ -1376,7 +1376,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb index dc5679ca..1ae0f2a5 100644 --- a/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb @@ -2614,7 +2614,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2656,7 +2656,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb index 568593a8..b9e7caf1 100644 --- a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -1278,7 +1278,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1308,7 +1308,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb b/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb index 7dad4d8a..3a5c7f1d 100644 --- a/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb +++ b/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb @@ -1684,7 +1684,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb index 81a336fa..b48eb85c 100644 --- a/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb @@ -1812,7 +1812,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index 61be2340..6da4f1b2 100644 --- a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -808,7 +808,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index 4b0944aa..ec200d0b 100644 --- a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -3,8 +3,8 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -1248,7 +1248,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -12737,8 +12737,8 @@ "accelerator": "GPU", "colab": { "gpuType": "T4", - "provenance": [], - "include_colab_link": true + "include_colab_link": true, + "provenance": [] }, "kernelspec": { "display_name": "Python 3", diff --git a/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 6af2eedb..8afb6453 100644 --- a/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -1704,7 +1704,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb index 9e9f25e9..72adabe9 100644 --- a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb +++ b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb @@ -8,7 +8,7 @@ "
\n", "\n", "\n", - " Join Discord if you need help + ⭐ Star us on Github ⭐\n", + " Join Discord if you need help + \u2b50 Star us on Github \u2b50\n", "
\n", "\n", "To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n", @@ -51,22 +51,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "%%capture\n", - "import os\n", - "os.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n", - "!pip install --upgrade -qqq uv\n", - "try: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\n", - "except: get_numpy = \"numpy\"; get_pil = \"pillow\"\n", - "try: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\n", - "except: is_t4 = False\n", - "get_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n", - "!uv pip install -qqq --upgrade unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n", - "!uv pip install -qqq {get_triton}\n", - "!uv pip install \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.\n", - "!uv pip install transformers==4.56.2\n", - "!uv pip install --no-deps trl==0.22.2" - ] + "source": "%%capture\nimport os\nos.environ[\"UNSLOTH_VLLM_STANDBY\"] = \"1\" # [NEW] Extra 30% context lengths!\n!pip install --upgrade -qqq uv\ntry: import numpy, PIL; get_numpy = f\"numpy=={numpy.__version__}\"; get_pil = f\"pillow=={PIL.__version__}\"\nexcept: get_numpy = \"numpy\"; get_pil = \"pillow\"\ntry: import subprocess; is_t4 = \"Tesla T4\" in str(subprocess.check_output([\"nvidia-smi\"]))\nexcept: is_t4 = False\nget_vllm, get_triton = (\"vllm==0.9.2\", \"triton==3.2.0\") if is_t4 else (\"vllm==0.10.2\", \"triton\")\n!uv pip install -qqq --upgrade unsloth {get_vllm} {get_numpy} {get_pil} torchvision bitsandbytes xformers\n!uv pip install -qqq {get_triton}\n!uv pip install \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.\n!uv pip install transformers==4.56.2\n!uv pip install --no-deps trl==0.22.2" }, { "cell_type": "markdown", @@ -181,8 +166,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "πŸ¦₯ Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", - "πŸ¦₯ Unsloth Zoo will now patch everything to make training faster!\n", + "\ud83e\udda5 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n", + "\ud83e\udda5 Unsloth Zoo will now patch everything to make training faster!\n", "INFO 03-19 15:51:40 [__init__.py:256] Automatically detected platform cuda.\n", "==((====))== Unsloth 2025.3.17: Fast Gemma3 patching. Transformers: 4.50.0.dev0. vLLM: 0.8.0.\n", " \\\\ /| Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.\n", @@ -1040,7 +1025,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1071,7 +1056,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -1375,15 +1360,15 @@ "Loan amount: $480,000\n", "Interest rate: We need to assume an interest rate for this problem. Let's assume an annual interest rate of 6% (this is a common rate).\n", "Loan term: 20 years, so 20 * 12 = 240 months\n", - "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n", + "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n", "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n", "\n", "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n", "Number of months (n) = 240\n", "\n", - "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n", - "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n", - "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 – 1]\n", + "M = 480000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n", + "M = 480000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n", + "M = 480000 [ 0.005 * 3.310853] / [ 3.310853 \u2013 1]\n", "M = 480000 [ 0.016554265] / [2.310853]\n", "M = 480000 * 0.00703658\n", "M = $331.54\n", @@ -1392,15 +1377,15 @@ "Loan amount: $120,000\n", "Interest rate: We still assume an annual interest rate of 6% (this is a common rate).\n", "Loan term: 20 years, so 20 * 12 = 240 months\n", - "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n – 1]\n", + "We will use the loan payment formula: M = P [ i(1 + i)^n ] / [ (1 + i)^n \u2013 1]\n", "where M is the monthly payment, P is the loan amount, i is the monthly interest rate, and n is the number of months.\n", "\n", "Monthly interest rate (i) = Annual interest rate / 12 = 0.06 / 12 = 0.005\n", "Number of months (n) = 240\n", "\n", - "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 – 1]\n", - "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 – 1]\n", - "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 – 1]\n", + "M = 120000 [ 0.005(1 + 0.005)^240 ] / [ (1 + 0.005)^240 \u2013 1]\n", + "M = 120000 [ 0.005(1.005)^240 ] / [ (1.005)^240 \u2013 1]\n", + "M = 120000 [ 0.005 * 3.310853 ] / [ 3.310853 \u2013 1]\n", "M = 120000 [ 0.016554265] / [2.310853]\n", "M = 1200 \n", "Extracted:\n", @@ -1965,10 +1950,9 @@ " \n", " \n", "\n", - " Join Discord if you need help + ⭐️ Star us on Github ⭐️\n", + " Join Discord if you need help + \u2b50\ufe0f Star us on Github \u2b50\ufe0f\n", "\n", - "\n", - " This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n" + "\n This notebook and all Unsloth notebooks are licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).\n" ] } ], @@ -2246,9 +2230,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_551b94fe4b3c4a4f8f3220d013a6d897", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_29d36346bc75470eacd30aebf1423e14", - "value": "tokenizer.json: 100%" + "value": "tokenizer.json:\u2007100%" } }, "0ef32700424c4799b8216de4ed8bbbb9": { @@ -2358,9 +2342,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_383d2f04eccd4e38ad791494a06423ed", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_36cabdee7cd645a58e1ffc794674a322", - "value": "special_tokens_map.json: 100%" + "value": "special_tokens_map.json:\u2007100%" } }, "1664b23faa1b4292bb5727bd525c45be": { @@ -2453,9 +2437,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_772360d1551141c5ab0a877ae4ed1c76", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_b0c51c819fff44c5a6e8e626fec9e937", - "value": "tokenizer_config.json: 100%" + "value": "tokenizer_config.json:\u2007100%" } }, "19731cf654e64eb3905f02f4ae277e8c": { @@ -2850,9 +2834,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6decae16dd5b404ba272d89df9c1372b", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_9151ae8e05634cf4a8ce42677f211bac", - "value": "Map: 100%" + "value": "Map:\u2007100%" } }, "340c07f1b7b14527a378880f9166ef55": { @@ -3008,9 +2992,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_671b4725f18a4c809b3c97a65ec9e405", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_faa61469d4bc46aebe74d5401d42c3ab", - "value": " 35.0/35.0 [00:00<00:00, 2.66kB/s]" + "value": "\u200735.0/35.0\u2007[00:00<00:00,\u20072.66kB/s]" } }, "3fabb349d1f943b09ed25784a0d0ab0a": { @@ -3333,9 +3317,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cf83a6e558f64ff98276981a82f3b2ac", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_0ef32700424c4799b8216de4ed8bbbb9", - "value": "test-00000-of-00001.parquet: 100%" + "value": "test-00000-of-00001.parquet:\u2007100%" } }, "50503005b58845b79254f89f95fb03d9": { @@ -3378,9 +3362,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d1e38bbf7593462bb87b14bacee9e3d9", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_846c1dd9b3214c68bbfc362166a6b9ee", - "value": " 7473/7473 [00:00<00:00, 5203.68 examples/s]" + "value": "\u20077473/7473\u2007[00:00<00:00,\u20075203.68\u2007examples/s]" } }, "54c603df4e174b70bd03bce59de287b2": { @@ -3491,9 +3475,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9677dc2a2f4847d89d3ca42c01435205", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_645b707de96a4c2eb07eb118db311fb6", - "value": "Generating train split: 100%" + "value": "Generating\u2007train\u2007split:\u2007100%" } }, "61d7ca5d56f14d7d93d5cf5e5b712da2": { @@ -4040,9 +4024,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_669cb00554134e8287d0daae7372d397", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_9e7bb56731134d9cba89cbde208358e0", - "value": "train-00000-of-00001.parquet: 100%" + "value": "train-00000-of-00001.parquet:\u2007100%" } }, "7c4cd321445b43bfa818b599d42c8cb7": { @@ -4061,9 +4045,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cec948b41ee348fcbdf2f5fa290ffe42", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_6d1b19657db94907b6fdf0a3b3a05e84", - "value": "README.md: 100%" + "value": "README.md:\u2007100%" } }, "7d4b49f0c54046a89039d25ee7c11f6f": { @@ -4238,9 +4222,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1da84b32592d432fb1a57358ddcdceff", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_bbcd20b75ce445bea7be9597efa68d73", - "value": " 33.4M/33.4M [00:00<00:00, 144MB/s]" + "value": "\u200733.4M/33.4M\u2007[00:00<00:00,\u2007144MB/s]" } }, "8464d2f310b045808bcd7206aa7c8cc5": { @@ -4289,9 +4273,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_70a0b21c05e642e5891bb8b91cfb2217", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_444aa81458bf4790915c188069506864", - "value": "tokenizer.model: 100%" + "value": "tokenizer.model:\u2007100%" } }, "859b75a69281413383fc1c0946bf63d0": { @@ -4341,9 +4325,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_02d6b8e9a02b470a92f700d9e7fea5d4", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8464d2f310b045808bcd7206aa7c8cc5", - "value": "Generating test split: 100%" + "value": "Generating\u2007test\u2007split:\u2007100%" } }, "8a21cb24786049e2946c9a050a6673ab": { @@ -4414,9 +4398,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0c96fa0b7ed344f4be44632451b406d7", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_bf96f070c2f54468a674f11681fcb22d", - "value": " 2.31M/2.31M [00:00<00:00, 17.7MB/s]" + "value": "\u20072.31M/2.31M\u2007[00:00<00:00,\u200717.7MB/s]" } }, "8ca7d620dc5d415a83b50f134317d925": { @@ -4808,9 +4792,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_67eda18557dc461483821ffd44a25eb4", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_4cabdb178b97464e875d507dd1410bbf", - "value": " 7.94k/7.94k [00:00<00:00, 179kB/s]" + "value": "\u20077.94k/7.94k\u2007[00:00<00:00,\u2007179kB/s]" } }, "9e7bb56731134d9cba89cbde208358e0": { @@ -5030,9 +5014,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_def9dbef26334436a6b2298b5b153f47", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_eea838fbfd2c4e82b1769fde1036487e", - "value": " 4.69M/4.69M [00:00<00:00, 26.2MB/s]" + "value": "\u20074.69M/4.69M\u2007[00:00<00:00,\u200726.2MB/s]" } }, "a9dbcb0e164544ba8321a7916500009d": { @@ -5207,9 +5191,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45aa2714bf0b44e9af7668c674f63863", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8ca7d620dc5d415a83b50f134317d925", - "value": " 215/215 [00:00<00:00, 13.4kB/s]" + "value": "\u2007215/215\u2007[00:00<00:00,\u200713.4kB/s]" } }, "b0c51c819fff44c5a6e8e626fec9e937": { @@ -5243,9 +5227,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d9e72108a08b447ea2a29932391fe429", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_21366789f69d431bad2ac2c8d1ec1565", - "value": " 2.00G/2.00G [00:15<00:00, 71.2MB/s]" + "value": "\u20072.00G/2.00G\u2007[00:15<00:00,\u200771.2MB/s]" } }, "b67486cb78274f2baa4f9afdc2fd7e3c": { @@ -5264,9 +5248,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_21f2aec6d7af4c3a82306ad235674829", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_fd6ff32a3aa7479e8b5d0cda47819742", - "value": "model.safetensors: 100%" + "value": "model.safetensors:\u2007100%" } }, "ba4ac2596dda42698ae048f9b7a11c61": { @@ -5845,9 +5829,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8029855dfa614963a7f5ff11d48dcbdb", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_e74fcfb683074f27806b6aaae329b74d", - "value": " 670/670 [00:00<00:00, 74.3kB/s]" + "value": "\u2007670/670\u2007[00:00<00:00,\u200774.3kB/s]" } }, "d7b48dafe15947c9b225681c4a326581": { @@ -5955,9 +5939,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a9dbcb0e164544ba8321a7916500009d", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_a6030d7b5bbd460fb5aa1356c607ec82", - "value": " 419k/419k [00:00<00:00, 9.62MB/s]" + "value": "\u2007419k/419k\u2007[00:00<00:00,\u20079.62MB/s]" } }, "def9dbef26334436a6b2298b5b153f47": { @@ -6095,9 +6079,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_25c7874a020344e7aad2869f7a27db4e", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_86e9660b2d72460c836d9bba348be56b", - "value": " 1319/1319 [00:00<00:00, 12220.76 examples/s]" + "value": "\u20071319/1319\u2007[00:00<00:00,\u200712220.76\u2007examples/s]" } }, "ed53e921682348b28e2be40eaf96cbf7": { @@ -6140,9 +6124,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_185b378797334c819f8a199760cac945", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_a1fced067e1e40cbb08e25c9377123e0", - "value": "generation_config.json: 100%" + "value": "generation_config.json:\u2007100%" } }, "eea838fbfd2c4e82b1769fde1036487e": { @@ -6176,9 +6160,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_45eda31f22294728bbca360abad799e6", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_d7b48dafe15947c9b225681c4a326581", - "value": " 1.16M/1.16M [00:00<00:00, 6.41MB/s]" + "value": "\u20071.16M/1.16M\u2007[00:00<00:00,\u20076.41MB/s]" } }, "efaa563da24149aaaa153b7e8c473394": { @@ -6356,9 +6340,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_aaf739645f654cd6a94d897358b2c2e0", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_f0c1afb62d8a4616a8df27c9de866916", - "value": "added_tokens.json: 100%" + "value": "added_tokens.json:\u2007100%" } }, "f6a520e1570d4f7faffc1b6e2c6200d2": { @@ -6377,9 +6361,9 @@ "description": "", "description_tooltip": null, "layout": "IPY_MODEL_10c20527dc19466eb4d6eb325529a0df", - "placeholder": "​", + "placeholder": "\u200b", "style": "IPY_MODEL_8f2974213b954318902d39a1fda9b3fb", - "value": " 7473/7473 [00:00<00:00, 9263.20 examples/s]" + "value": "\u20077473/7473\u2007[00:00<00:00,\u20079263.20\u2007examples/s]" } }, "f73516b2403f411d925618cfa2af5f46": { @@ -6442,4 +6426,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb index 66a7912e..ae0dd088 100644 --- a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb +++ b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb @@ -1107,7 +1107,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1137,7 +1137,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb index 43327c1c..616f4587 100644 --- a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb +++ b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb @@ -730,7 +730,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -761,7 +761,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb index 5966dfa1..6f19ae5f 100644 --- a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb +++ b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb @@ -605,7 +605,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb index d3850047..7f7dfcdc 100644 --- a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb +++ b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb @@ -383,7 +383,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb index 81b4eb97..9a3dbc2e 100644 --- a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb @@ -900,7 +900,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb index cedb3bec..b96fca21 100644 --- a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb +++ b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb @@ -1367,7 +1367,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb b/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb index 4b48294f..fa45a5e1 100644 --- a/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb +++ b/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb @@ -2605,7 +2605,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2647,7 +2647,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb index 4576f4a3..3f63ddf9 100644 --- a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -1269,7 +1269,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1299,7 +1299,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb b/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb index 61f365f2..389da9c2 100644 --- a/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb +++ b/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb @@ -1682,7 +1682,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb index c4f6d29d..20a2202e 100644 --- a/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb @@ -1810,7 +1810,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Llama3.1_(8B)-GRPO.ipynb b/nb/Llama3.1_(8B)-GRPO.ipynb index 7f3bd69e..207d094e 100644 --- a/nb/Llama3.1_(8B)-GRPO.ipynb +++ b/nb/Llama3.1_(8B)-GRPO.ipynb @@ -737,7 +737,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -768,7 +768,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Mistral_v0.3_(7B)-GRPO.ipynb index 5ccdbe0c..72020966 100644 --- a/nb/Mistral_v0.3_(7B)-GRPO.ipynb +++ b/nb/Mistral_v0.3_(7B)-GRPO.ipynb @@ -612,7 +612,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/Phi_4_(14B)-GRPO.ipynb b/nb/Phi_4_(14B)-GRPO.ipynb index 165b1051..5e8ba2d8 100644 --- a/nb/Phi_4_(14B)-GRPO.ipynb +++ b/nb/Phi_4_(14B)-GRPO.ipynb @@ -390,7 +390,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/nb/Qwen2.5_(3B)-GRPO.ipynb b/nb/Qwen2.5_(3B)-GRPO.ipynb index 0bb2bf48..432a36ae 100644 --- a/nb/Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/Qwen2.5_(3B)-GRPO.ipynb @@ -907,7 +907,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/nb/Qwen2_5_7B_VL_GRPO.ipynb b/nb/Qwen2_5_7B_VL_GRPO.ipynb index 5fc1c362..458088b4 100644 --- a/nb/Qwen2_5_7B_VL_GRPO.ipynb +++ b/nb/Qwen2_5_7B_VL_GRPO.ipynb @@ -1374,7 +1374,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/nb/Qwen3_(4B)-GRPO.ipynb b/nb/Qwen3_(4B)-GRPO.ipynb index 9de7e7ce..96c6a87b 100644 --- a/nb/Qwen3_(4B)-GRPO.ipynb +++ b/nb/Qwen3_(4B)-GRPO.ipynb @@ -2612,7 +2612,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2654,7 +2654,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/gpt-oss-(20B)-GRPO.ipynb b/nb/gpt-oss-(20B)-GRPO.ipynb index 61f365f2..389da9c2 100644 --- a/nb/gpt-oss-(20B)-GRPO.ipynb +++ b/nb/gpt-oss-(20B)-GRPO.ipynb @@ -1682,7 +1682,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/nb/gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/gpt_oss_(20B)_GRPO_BF16.ipynb index c4f6d29d..20a2202e 100644 --- a/nb/gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/nb/gpt_oss_(20B)_GRPO_BF16.ipynb @@ -1810,7 +1810,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index 12f5a140..d7d195a0 100644 --- a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -796,7 +796,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index b5016295..c7d710a7 100644 --- a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -3,8 +3,8 @@ { "cell_type": "markdown", "metadata": { - "id": "view-in-github", - "colab_type": "text" + "colab_type": "text", + "id": "view-in-github" }, "source": [ "\"Open" @@ -1278,7 +1278,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 4, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", @@ -12766,8 +12766,8 @@ "accelerator": "GPU", "colab": { "gpuType": "T4", - "provenance": [], - "include_colab_link": true + "include_colab_link": true, + "provenance": [] }, "kernelspec": { "display_name": "Python 3", diff --git a/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 0f36aad2..4abe825b 100644 --- a/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -1690,7 +1690,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Gemma3_(1B)-GRPO.ipynb b/original_template/Gemma3_(1B)-GRPO.ipynb index ee0615ba..a891f962 100644 --- a/original_template/Gemma3_(1B)-GRPO.ipynb +++ b/original_template/Gemma3_(1B)-GRPO.ipynb @@ -1011,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1042,7 +1042,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_torch_fused\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb index 8cc14868..1b7170e5 100644 --- a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb +++ b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb @@ -1093,7 +1093,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1123,7 +1123,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/original_template/Llama3.1_(8B)-GRPO.ipynb b/original_template/Llama3.1_(8B)-GRPO.ipynb index 38b2317a..fdb6354c 100644 --- a/original_template/Llama3.1_(8B)-GRPO.ipynb +++ b/original_template/Llama3.1_(8B)-GRPO.ipynb @@ -716,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -747,7 +747,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb index e8e8d23a..5d987984 100644 --- a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb +++ b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb @@ -591,7 +591,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Phi_4_(14B)-GRPO.ipynb b/original_template/Phi_4_(14B)-GRPO.ipynb index 0346fb2d..1ebf2f03 100644 --- a/original_template/Phi_4_(14B)-GRPO.ipynb +++ b/original_template/Phi_4_(14B)-GRPO.ipynb @@ -369,7 +369,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"paged_adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 6,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 6, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/original_template/Qwen2.5_(3B)-GRPO.ipynb b/original_template/Qwen2.5_(3B)-GRPO.ipynb index a1c58094..24615a43 100644 --- a/original_template/Qwen2.5_(3B)-GRPO.ipynb +++ b/original_template/Qwen2.5_(3B)-GRPO.ipynb @@ -886,7 +886,7 @@ " lr_scheduler_type = \"cosine\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 8,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 8, # Decrease if out of memory\n", " max_prompt_length = 256,\n", diff --git a/original_template/Qwen2_5_7B_VL_GRPO.ipynb b/original_template/Qwen2_5_7B_VL_GRPO.ipynb index 812e1d4d..de12d0c3 100644 --- a/original_template/Qwen2_5_7B_VL_GRPO.ipynb +++ b/original_template/Qwen2_5_7B_VL_GRPO.ipynb @@ -1353,7 +1353,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 2, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/original_template/Qwen3_(4B)-GRPO.ipynb b/original_template/Qwen3_(4B)-GRPO.ipynb index 2971f497..0b469467 100644 --- a/original_template/Qwen3_(4B)-GRPO.ipynb +++ b/original_template/Qwen3_(4B)-GRPO.ipynb @@ -2591,7 +2591,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -2633,7 +2633,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 4,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 4, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb index 67aac49d..036ca8a5 100644 --- a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -1255,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -1285,7 +1285,7 @@ " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", " log_completions = False,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = 1024,\n", diff --git a/original_template/gpt-oss-(20B)-GRPO.ipynb b/original_template/gpt-oss-(20B)-GRPO.ipynb index 1ed932d1..6abe9a59 100644 --- a/original_template/gpt-oss-(20B)-GRPO.ipynb +++ b/original_template/gpt-oss-(20B)-GRPO.ipynb @@ -1659,7 +1659,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb b/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb index 226aeae0..b7da0f96 100644 --- a/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb @@ -1787,7 +1787,7 @@ " lr_scheduler_type = \"linear\",\n", " optim = \"adamw_8bit\",\n", " logging_steps = 1,\n", - " per_device_train_batch_size = 1,\n", + " per_device_train_batch_size = 2,\n", " gradient_accumulation_steps = 1, # Increase to 4 for smoother training\n", " num_generations = 2, # Decrease if out of memory\n", " max_prompt_length = max_prompt_length,\n", diff --git a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py index 0ee01230..371939f2 100644 --- a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py +++ b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py @@ -355,7 +355,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 4, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py index 658cf301..cd6e678d 100644 --- a/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py +++ b/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py @@ -488,7 +488,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Gemma3_(1B)-GRPO.py b/python_scripts/Gemma3_(1B)-GRPO.py index b67873dc..bfc7dab3 100644 --- a/python_scripts/Gemma3_(1B)-GRPO.py +++ b/python_scripts/Gemma3_(1B)-GRPO.py @@ -335,7 +335,7 @@ def check_numbers(prompts, completions, answer, **kwargs): # # Now set up GRPO Trainer and all configurations! -# In[18]: +# In[ ]: max_prompt_length = 256 @@ -350,7 +350,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_torch_fused", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Gemma3_(4B)-Vision-GRPO.py index c4bb5e0e..3140acd5 100644 --- a/python_scripts/Gemma3_(4B)-Vision-GRPO.py +++ b/python_scripts/Gemma3_(4B)-Vision-GRPO.py @@ -297,7 +297,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # # Now set up GRPO Trainer and all configurations! -# In[14]: +# In[ ]: from trl import GRPOConfig, GRPOTrainer @@ -311,7 +311,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 2, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py index cae46edc..3f071048 100644 --- a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py +++ b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py @@ -354,7 +354,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_torch_fused", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 8, gradient_accumulation_steps = 4, # Increase to 4 for smoother training num_generations = 8, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py index 0ee01230..371939f2 100644 --- a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py +++ b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py @@ -355,7 +355,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 4, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py index c4f3ca82..9edaaac3 100644 --- a/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py +++ b/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py @@ -490,7 +490,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py index d99f852f..d319788c 100644 --- a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py @@ -337,7 +337,7 @@ def check_numbers(prompts, completions, answer, **kwargs): # # Now set up GRPO Trainer and all configurations! -# In[18]: +# In[ ]: max_prompt_length = 256 @@ -352,7 +352,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_torch_fused", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py index fb3d2d2b..d692fae9 100644 --- a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py +++ b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py @@ -299,7 +299,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # # Now set up GRPO Trainer and all configurations! -# In[14]: +# In[ ]: from trl import GRPOConfig, GRPOTrainer @@ -313,7 +313,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 2, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py index 21ac7970..4ccf75a7 100644 --- a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py @@ -199,7 +199,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # # Now set up GRPO Trainer and all configurations! -# In[4]: +# In[ ]: max_prompt_length = 256 @@ -214,7 +214,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py index f6636b0a..cdcec7a7 100644 --- a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py @@ -214,7 +214,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py index 239c98a8..634ab004 100644 --- a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py @@ -210,7 +210,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = 256, diff --git a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py index 6a3dab51..d50aa768 100644 --- a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py @@ -213,7 +213,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 8, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 8, # Decrease if out of memory max_prompt_length = 256, diff --git a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py index 01409e4a..6cf223bc 100644 --- a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py @@ -326,7 +326,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa optim = "adamw_8bit", logging_steps = 1, log_completions = False, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 2, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py index 9be4441d..5cf68e51 100644 --- a/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py @@ -572,7 +572,7 @@ def check_numbers(prompts, completions, answer, **kwargs): # # Now set up GRPO Trainer and all configurations! -# In[30]: +# In[ ]: max_prompt_length = maximum_length + 1 # + 1 just in case! @@ -598,7 +598,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py index 9977919f..c9288042 100644 --- a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py @@ -320,7 +320,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # # Now set up the `GRPO` Trainer and all configurations! Note we actually enable `GSPO` as well! -# In[18]: +# In[ ]: from trl import GRPOConfig, GRPOTrainer @@ -334,7 +334,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa optim = "adamw_8bit", logging_steps = 1, log_completions = False, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py b/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py index 79ead469..0c62f87e 100644 --- a/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py +++ b/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py @@ -687,7 +687,7 @@ def speed_check(completions, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py index 28596c24..3a621c17 100644 --- a/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py +++ b/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py @@ -687,7 +687,7 @@ def speed_check(completions, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py index 979e2167..30ff6665 100644 --- a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py +++ b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py @@ -330,7 +330,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_torch_fused", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 8, gradient_accumulation_steps = 4, # Increase to 4 for smoother training num_generations = 8, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py index 813d9a00..58041e99 100644 --- a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py +++ b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py @@ -333,7 +333,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 4, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py index b9c828a7..25b1e126 100644 --- a/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py +++ b/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py @@ -466,7 +466,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py index c27a4e67..f6d7e968 100644 --- a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py +++ b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py @@ -313,7 +313,7 @@ def check_numbers(prompts, completions, answer, **kwargs): # # Now set up GRPO Trainer and all configurations! -# In[18]: +# In[ ]: max_prompt_length = 256 @@ -328,7 +328,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "cosine", optim = "adamw_torch_fused", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py index aa8a42c5..84d6e61c 100644 --- a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py +++ b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py @@ -275,7 +275,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # # Now set up GRPO Trainer and all configurations! -# In[14]: +# In[ ]: from trl import GRPOConfig, GRPOTrainer @@ -289,7 +289,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 2, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py index e3de3741..4c876433 100644 --- a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py +++ b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py @@ -175,7 +175,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # # Now set up GRPO Trainer and all configurations! -# In[4]: +# In[ ]: max_prompt_length = 256 @@ -190,7 +190,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py index 09f746f0..d7043bbb 100644 --- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py +++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py @@ -190,7 +190,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py index 964b7e22..11a4d3c3 100644 --- a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py +++ b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py @@ -186,7 +186,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = 256, diff --git a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py index 201fb29e..17dbcc60 100644 --- a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py +++ b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py @@ -189,7 +189,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 8, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 8, # Decrease if out of memory max_prompt_length = 256, diff --git a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py index 73870f7f..0808d099 100644 --- a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py +++ b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py @@ -302,7 +302,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa optim = "adamw_8bit", logging_steps = 1, log_completions = False, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 2, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py b/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py index 233619af..c1ae49d9 100644 --- a/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py +++ b/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py @@ -548,7 +548,7 @@ def check_numbers(prompts, completions, answer, **kwargs): # # Now set up GRPO Trainer and all configurations! -# In[30]: +# In[ ]: max_prompt_length = maximum_length + 1 # + 1 just in case! @@ -574,7 +574,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py index 4e33ce96..85f8ec7f 100644 --- a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py +++ b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py @@ -296,7 +296,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # # Now set up the `GRPO` Trainer and all configurations! Note we actually enable `GSPO` as well! -# In[18]: +# In[ ]: from trl import GRPOConfig, GRPOTrainer @@ -310,7 +310,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa optim = "adamw_8bit", logging_steps = 1, log_completions = False, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py b/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py index 5d609c71..6100ebd0 100644 --- a/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py +++ b/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py @@ -685,7 +685,7 @@ def speed_check(completions, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py index 27c56aec..c970b081 100644 --- a/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py +++ b/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py @@ -685,7 +685,7 @@ def speed_check(completions, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Llama3.1_(8B)-GRPO.py b/python_scripts/Llama3.1_(8B)-GRPO.py index 02b47f29..8704f763 100644 --- a/python_scripts/Llama3.1_(8B)-GRPO.py +++ b/python_scripts/Llama3.1_(8B)-GRPO.py @@ -197,7 +197,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # # Now set up GRPO Trainer and all configurations! -# In[4]: +# In[ ]: max_prompt_length = 256 @@ -212,7 +212,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Mistral_v0.3_(7B)-GRPO.py index b5630f32..28a6a5a7 100644 --- a/python_scripts/Mistral_v0.3_(7B)-GRPO.py +++ b/python_scripts/Mistral_v0.3_(7B)-GRPO.py @@ -212,7 +212,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/Phi_4_(14B)-GRPO.py b/python_scripts/Phi_4_(14B)-GRPO.py index 605e0b5a..fbaa60ef 100644 --- a/python_scripts/Phi_4_(14B)-GRPO.py +++ b/python_scripts/Phi_4_(14B)-GRPO.py @@ -208,7 +208,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "paged_adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 6, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 6, # Decrease if out of memory max_prompt_length = 256, diff --git a/python_scripts/Qwen2.5_(3B)-GRPO.py b/python_scripts/Qwen2.5_(3B)-GRPO.py index b281f211..b68637ea 100644 --- a/python_scripts/Qwen2.5_(3B)-GRPO.py +++ b/python_scripts/Qwen2.5_(3B)-GRPO.py @@ -211,7 +211,7 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: lr_scheduler_type = "cosine", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 8, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 8, # Decrease if out of memory max_prompt_length = 256, diff --git a/python_scripts/Qwen2_5_7B_VL_GRPO.py b/python_scripts/Qwen2_5_7B_VL_GRPO.py index bc38b951..84212ef8 100644 --- a/python_scripts/Qwen2_5_7B_VL_GRPO.py +++ b/python_scripts/Qwen2_5_7B_VL_GRPO.py @@ -324,7 +324,7 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa optim = "adamw_8bit", logging_steps = 1, log_completions = False, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 2, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = 1024, diff --git a/python_scripts/Qwen3_(4B)-GRPO.py b/python_scripts/Qwen3_(4B)-GRPO.py index 04a0e2a8..e53bbf7b 100644 --- a/python_scripts/Qwen3_(4B)-GRPO.py +++ b/python_scripts/Qwen3_(4B)-GRPO.py @@ -570,7 +570,7 @@ def check_numbers(prompts, completions, answer, **kwargs): # # Now set up GRPO Trainer and all configurations! -# In[30]: +# In[ ]: max_prompt_length = maximum_length + 1 # + 1 just in case! @@ -596,7 +596,7 @@ def check_numbers(prompts, completions, answer, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 4, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 4, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/gpt-oss-(20B)-GRPO.py b/python_scripts/gpt-oss-(20B)-GRPO.py index 5d609c71..6100ebd0 100644 --- a/python_scripts/gpt-oss-(20B)-GRPO.py +++ b/python_scripts/gpt-oss-(20B)-GRPO.py @@ -685,7 +685,7 @@ def speed_check(completions, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = max_prompt_length, diff --git a/python_scripts/gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/gpt_oss_(20B)_GRPO_BF16.py index 27c56aec..c970b081 100644 --- a/python_scripts/gpt_oss_(20B)_GRPO_BF16.py +++ b/python_scripts/gpt_oss_(20B)_GRPO_BF16.py @@ -685,7 +685,7 @@ def speed_check(completions, **kwargs): lr_scheduler_type = "linear", optim = "adamw_8bit", logging_steps = 1, - per_device_train_batch_size = 1, + per_device_train_batch_size = 2, gradient_accumulation_steps = 1, # Increase to 4 for smoother training num_generations = 2, # Decrease if out of memory max_prompt_length = max_prompt_length,