unslothai · kadirnar · Aug 5, 2025
diff --git a/nb/Kaggle-Orpheus_(3B)-TTS.ipynb b/nb/Kaggle-Orpheus_(3B)-TTS.ipynb
@@ -8,7 +8,7 @@
     "<div class=\"align-center\">\n",
     "<a href=\"https://unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
     "<a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord button.png\" width=\"145\"></a>\n",
-    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + \u2b50 <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\n",
+    "<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐\n",
     "</div>\n",
     "\n",
     "To install Unsloth on your own computer, follow the installation instructions on our Github page [here](https://docs.unsloth.ai/get-started/installing-+-updating).\n",
@@ -46,7 +46,18 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "%%capture\nimport os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n!pip install pip3-autoremove\n!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124\n!pip install unsloth\n!pip install --upgrade transformers==4.53.2 \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.0\"\n\n!pip install snac"
+   "source": [
+    "%%capture\n",
+    "import os\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
+    "\n",
+    "!pip install pip3-autoremove\n",
+    "!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124\n",
+    "!pip install unsloth\n",
+    "!pip install --upgrade transformers==4.53.2 \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.0\"\n",
+    "\n",
+    "!pip install snac"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -195,6 +206,29 @@
     "dataset = load_dataset(\"MrDragonFox/Elise\", split = \"train\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokeniser_length = 128256\n",
+    "start_of_text = 128000\n",
+    "end_of_text = 128009\n",
+    "\n",
+    "start_of_speech = tokeniser_length + 1\n",
+    "end_of_speech = tokeniser_length + 2\n",
+    "\n",
+    "start_of_human = tokeniser_length + 3\n",
+    "end_of_human = tokeniser_length + 4\n",
+    "\n",
+    "start_of_ai = tokeniser_length + 5\n",
+    "end_of_ai =  tokeniser_length + 6\n",
+    "pad_token = tokeniser_length + 7\n",
+    "\n",
+    "audio_tokens_start = tokeniser_length + 10"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -383,13 +417,13 @@
     "\n",
     "  all_codes = []\n",
     "  for i in range(codes[0].shape[1]):\n",
-    "    all_codes.append(codes[0][0][i].item()+128266)\n",
-    "    all_codes.append(codes[1][0][2*i].item()+128266+4096)\n",
-    "    all_codes.append(codes[2][0][4*i].item()+128266+(2*4096))\n",
-    "    all_codes.append(codes[2][0][(4*i)+1].item()+128266+(3*4096))\n",
-    "    all_codes.append(codes[1][0][(2*i)+1].item()+128266+(4*4096))\n",
-    "    all_codes.append(codes[2][0][(4*i)+2].item()+128266+(5*4096))\n",
-    "    all_codes.append(codes[2][0][(4*i)+3].item()+128266+(6*4096))\n",
+    "    all_codes.append(codes[0][0][i].item()+audio_tokens_start)\n",
+    "    all_codes.append(codes[1][0][2*i].item()+audio_tokens_start+4096)\n",
+    "    all_codes.append(codes[2][0][4*i].item()+audio_tokens_start+(2*4096))\n",
+    "    all_codes.append(codes[2][0][(4*i)+1].item()+audio_tokens_start+(3*4096))\n",
+    "    all_codes.append(codes[1][0][(2*i)+1].item()+audio_tokens_start+(4*4096))\n",
+    "    all_codes.append(codes[2][0][(4*i)+2].item()+audio_tokens_start+(5*4096))\n",
+    "    all_codes.append(codes[2][0][(4*i)+3].item()+audio_tokens_start+(6*4096))\n",
     "\n",
     "\n",
     "  return all_codes\n",
@@ -412,23 +446,6 @@
     "    return example\n",
     "\n",
     "dataset = dataset.map(add_codes, remove_columns=[\"audio\"])\n",
-    "\n",
-    "tokeniser_length = 128256\n",
-    "start_of_text = 128000\n",
-    "end_of_text = 128009\n",
-    "\n",
-    "start_of_speech = tokeniser_length + 1\n",
-    "end_of_speech = tokeniser_length + 2\n",
-    "\n",
-    "start_of_human = tokeniser_length + 3\n",
-    "end_of_human = tokeniser_length + 4\n",
-    "\n",
-    "start_of_ai = tokeniser_length + 5\n",
-    "end_of_ai =  tokeniser_length + 6\n",
-    "pad_token = tokeniser_length + 7\n",
-    "\n",
-    "audio_tokens_start = tokeniser_length + 10\n",
-    "\n",
     "dataset = dataset.filter(lambda x: x[\"codes_list\"] is not None)\n",
     "dataset = dataset.filter(lambda x: len(x[\"codes_list\"]) > 0)\n",
     "\n",
@@ -1946,8 +1963,8 @@
     "  input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids\n",
     "  all_input_ids.append(input_ids)\n",
     "\n",
-    "start_token = torch.tensor([[ 128259]], dtype=torch.int64) # Start of human\n",
-    "end_tokens = torch.tensor([[128009, 128260]], dtype=torch.int64) # End of text, End of human\n",
+    "start_token = torch.tensor([[ start_of_human]], dtype=torch.int64) # Start of human\n",
+    "end_tokens = torch.tensor([[end_of_text, end_of_human]], dtype=torch.int64) # End of text, End of human\n",
     "\n",
     "all_modified_input_ids = []\n",
     "for input_ids in all_input_ids:\n",
@@ -1959,7 +1976,7 @@
     "max_length = max([modified_input_ids.shape[1] for modified_input_ids in all_modified_input_ids])\n",
     "for modified_input_ids in all_modified_input_ids:\n",
     "  padding = max_length - modified_input_ids.shape[1]\n",
-    "  padded_tensor = torch.cat([torch.full((1, padding), 128263, dtype=torch.int64), modified_input_ids], dim=1)\n",
+    "  padded_tensor = torch.cat([torch.full((1, padding), pad_token, dtype=torch.int64), modified_input_ids], dim=1)\n",
     "  attention_mask = torch.cat([torch.zeros((1, padding), dtype=torch.int64), torch.ones((1, modified_input_ids.shape[1]), dtype=torch.int64)], dim=1)\n",
     "  all_padded_tensors.append(padded_tensor)\n",
     "  all_attention_masks.append(attention_mask)\n",
@@ -1978,11 +1995,11 @@
     "      top_p=0.95,\n",
     "      repetition_penalty=1.1,\n",
     "      num_return_sequences=1,\n",
-    "      eos_token_id=128258,\n",
+    "      eos_token_id=end_of_speech,\n",
     "     use_cache = True\n",
     "  )\n",
-    "token_to_find = 128257\n",
-    "token_to_remove = 128258\n",
+    "token_to_find = start_of_speech\n",
+    "token_to_remove = end_of_speech\n",
     "\n",
     "token_indices = (generated_ids == token_to_find).nonzero(as_tuple=True)\n",
     "\n",
@@ -2006,7 +2023,7 @@
     "    row_length = row.size(0)\n",
     "    new_length = (row_length // 7) * 7\n",
     "    trimmed_row = row[:new_length]\n",
-    "    trimmed_row = [t - 128266 for t in trimmed_row]\n",
+    "    trimmed_row = [t - audio_tokens_start for t in trimmed_row]\n",
     "    code_lists.append(trimmed_row)\n",
     "\n",
     "\n",
@@ -2137,7 +2154,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 28/28 [00:01<00:00, 27.83it/s]\n"
+      "100%|██████████| 28/28 [00:01<00:00, 27.83it/s]\n"
      ]
     },
     {
@@ -2186,7 +2203,7 @@
     "  <a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord.png\" width=\"145\"></a>\n",
     "  <a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a>\n",
     "\n",
-    "  Join Discord if you need help + \u2b50\ufe0f <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\ufe0f\n",
+    "  Join Discord if you need help + ⭐️ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐️\n",
     "</div>\n"
    ]
   }
@@ -2257,9 +2274,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_4145e0f0ee3e45eaa45706496c45ad92",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_de11a736d05e463fa25eb5c134a33a42",
-      "value": "Filter:\u2007100%"
+      "value": "Filter: 100%"
      }
     },
     "09a90177afbf47dfa75d09e562f78ef1": {
@@ -2370,9 +2387,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_1104a11ea07348c09d9d6ea18a7a0c4f",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_c86528d68fb642c6b349e1f72309e9c2",
-      "value": "Filter:\u2007100%"
+      "value": "Filter: 100%"
      }
     },
     "1104a11ea07348c09d9d6ea18a7a0c4f": {
@@ -2864,9 +2881,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_1c99dbc970074f9788d87847019699d4",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_75dd059f3d13436b9fa15a26c3d56b61",
-      "value": "\u20071195/1195\u2007[00:56&lt;00:00,\u200722.63\u2007examples/s]"
+      "value": " 1195/1195 [00:56&lt;00:00, 22.63 examples/s]"
      }
     },
     "54560d7f982a4af0b680c276cb74e8de": {
@@ -3011,9 +3028,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_3deacc4ba8a540ef84bdf83d63c5446e",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_111b14c261a4468daa3f68b8dbc79629",
-      "value": "\u20071195/1195\u2007[00:00&lt;00:00,\u20072586.53\u2007examples/s]"
+      "value": " 1195/1195 [00:00&lt;00:00, 2586.53 examples/s]"
      }
     },
     "5c052294982d4da39950473ab7fd832f": {
@@ -3032,9 +3049,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_09a90177afbf47dfa75d09e562f78ef1",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_07fb4f7f1aab42f4a0d184ee2c5644ec",
-      "value": "\u20071195/1195\u2007[00:01&lt;00:00,\u2007988.84\u2007examples/s]"
+      "value": " 1195/1195 [00:01&lt;00:00, 988.84 examples/s]"
      }
     },
     "6a92469e5ff545e09ca55265ae57cb59": {
@@ -3210,9 +3227,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_54560d7f982a4af0b680c276cb74e8de",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_6a92469e5ff545e09ca55265ae57cb59",
-      "value": "\u20071195/1195\u2007[00:00&lt;00:00,\u20072636.42\u2007examples/s]"
+      "value": " 1195/1195 [00:00&lt;00:00, 2636.42 examples/s]"
      }
     },
     "8937c2575e9f48cb8498dc6554889a77": {
@@ -3387,9 +3404,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_f04c54b9ac7e4af1b94276c109ed349e",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_bf9ea54f70f6427cbfed6e58a6463583",
-      "value": "Map:\u2007100%"
+      "value": "Map: 100%"
      }
     },
     "a531278935a54a398db4f296ee88dca3": {
@@ -3424,9 +3441,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_6f85d840cc744c268da6e7470ffe8a58",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_1db03134e899468bab57cad43f33bb6f",
-      "value": "\u20071195/1195\u2007[00:01&lt;00:00,\u2007692.61\u2007examples/s]"
+      "value": " 1195/1195 [00:01&lt;00:00, 692.61 examples/s]"
      }
     },
     "a8cde83e4c224ff5a519cbe9e850426e": {
@@ -3784,9 +3801,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_eb257008691346d3a48ca7a694a6d9fe",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_6ac48a8dfb674c7090c8831841c3bd4d",
-      "value": "Map:\u2007100%"
+      "value": "Map: 100%"
      }
     },
     "f5199871f0dd40c498abfb9ecec75484": {
@@ -3805,9 +3822,9 @@
       "description": "",
       "description_tooltip": null,
       "layout": "IPY_MODEL_5638ccc92d1342b2a63a3945b957a762",
-      "placeholder": "\u200b",
+      "placeholder": "",
       "style": "IPY_MODEL_3d2aa872ce1a41fd81cdce2bd75a946d",
-      "value": "Map:\u2007100%"
+      "value": "Map: 100%"
      }
     },
     "fe99495940c24e709e79d4af9991a2df": {
@@ -3942,4 +3959,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}