Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 73 additions & 56 deletions nb/Kaggle-Orpheus_(3B)-TTS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"<div class=\"align-center\">\n",
"<a href=\"https://unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
"<a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord button.png\" width=\"145\"></a>\n",
"<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + \u2b50 <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\n",
"<a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a></a> Join Discord if you need help + <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \n",
"</div>\n",
"\n",
"To install Unsloth on your own computer, follow the installation instructions on our Github page [here](https://docs.unsloth.ai/get-started/installing-+-updating).\n",
Expand Down Expand Up @@ -46,7 +46,18 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "%%capture\nimport os\nos.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n\n!pip install pip3-autoremove\n!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124\n!pip install unsloth\n!pip install --upgrade transformers==4.53.2 \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.0\"\n\n!pip install snac"
"source": [
"%%capture\n",
"import os\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
"\n",
"!pip install pip3-autoremove\n",
"!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu124\n",
"!pip install unsloth\n",
"!pip install --upgrade transformers==4.53.2 \"huggingface_hub>=0.34.0\" \"datasets>=3.4.1,<4.0.0\"\n",
"\n",
"!pip install snac"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -195,6 +206,29 @@
"dataset = load_dataset(\"MrDragonFox/Elise\", split = \"train\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokeniser_length = 128256\n",
"start_of_text = 128000\n",
"end_of_text = 128009\n",
"\n",
"start_of_speech = tokeniser_length + 1\n",
"end_of_speech = tokeniser_length + 2\n",
"\n",
"start_of_human = tokeniser_length + 3\n",
"end_of_human = tokeniser_length + 4\n",
"\n",
"start_of_ai = tokeniser_length + 5\n",
"end_of_ai = tokeniser_length + 6\n",
"pad_token = tokeniser_length + 7\n",
"\n",
"audio_tokens_start = tokeniser_length + 10"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -383,13 +417,13 @@
"\n",
" all_codes = []\n",
" for i in range(codes[0].shape[1]):\n",
" all_codes.append(codes[0][0][i].item()+128266)\n",
" all_codes.append(codes[1][0][2*i].item()+128266+4096)\n",
" all_codes.append(codes[2][0][4*i].item()+128266+(2*4096))\n",
" all_codes.append(codes[2][0][(4*i)+1].item()+128266+(3*4096))\n",
" all_codes.append(codes[1][0][(2*i)+1].item()+128266+(4*4096))\n",
" all_codes.append(codes[2][0][(4*i)+2].item()+128266+(5*4096))\n",
" all_codes.append(codes[2][0][(4*i)+3].item()+128266+(6*4096))\n",
" all_codes.append(codes[0][0][i].item()+audio_tokens_start)\n",
" all_codes.append(codes[1][0][2*i].item()+audio_tokens_start+4096)\n",
" all_codes.append(codes[2][0][4*i].item()+audio_tokens_start+(2*4096))\n",
" all_codes.append(codes[2][0][(4*i)+1].item()+audio_tokens_start+(3*4096))\n",
" all_codes.append(codes[1][0][(2*i)+1].item()+audio_tokens_start+(4*4096))\n",
" all_codes.append(codes[2][0][(4*i)+2].item()+audio_tokens_start+(5*4096))\n",
" all_codes.append(codes[2][0][(4*i)+3].item()+audio_tokens_start+(6*4096))\n",
"\n",
"\n",
" return all_codes\n",
Expand All @@ -412,23 +446,6 @@
" return example\n",
"\n",
"dataset = dataset.map(add_codes, remove_columns=[\"audio\"])\n",
"\n",
"tokeniser_length = 128256\n",
"start_of_text = 128000\n",
"end_of_text = 128009\n",
"\n",
"start_of_speech = tokeniser_length + 1\n",
"end_of_speech = tokeniser_length + 2\n",
"\n",
"start_of_human = tokeniser_length + 3\n",
"end_of_human = tokeniser_length + 4\n",
"\n",
"start_of_ai = tokeniser_length + 5\n",
"end_of_ai = tokeniser_length + 6\n",
"pad_token = tokeniser_length + 7\n",
"\n",
"audio_tokens_start = tokeniser_length + 10\n",
"\n",
"dataset = dataset.filter(lambda x: x[\"codes_list\"] is not None)\n",
"dataset = dataset.filter(lambda x: len(x[\"codes_list\"]) > 0)\n",
"\n",
Expand Down Expand Up @@ -1946,8 +1963,8 @@
" input_ids = tokenizer(prompt, return_tensors=\"pt\").input_ids\n",
" all_input_ids.append(input_ids)\n",
"\n",
"start_token = torch.tensor([[ 128259]], dtype=torch.int64) # Start of human\n",
"end_tokens = torch.tensor([[128009, 128260]], dtype=torch.int64) # End of text, End of human\n",
"start_token = torch.tensor([[ start_of_human]], dtype=torch.int64) # Start of human\n",
"end_tokens = torch.tensor([[end_of_text, end_of_human]], dtype=torch.int64) # End of text, End of human\n",
"\n",
"all_modified_input_ids = []\n",
"for input_ids in all_input_ids:\n",
Expand All @@ -1959,7 +1976,7 @@
"max_length = max([modified_input_ids.shape[1] for modified_input_ids in all_modified_input_ids])\n",
"for modified_input_ids in all_modified_input_ids:\n",
" padding = max_length - modified_input_ids.shape[1]\n",
" padded_tensor = torch.cat([torch.full((1, padding), 128263, dtype=torch.int64), modified_input_ids], dim=1)\n",
" padded_tensor = torch.cat([torch.full((1, padding), pad_token, dtype=torch.int64), modified_input_ids], dim=1)\n",
" attention_mask = torch.cat([torch.zeros((1, padding), dtype=torch.int64), torch.ones((1, modified_input_ids.shape[1]), dtype=torch.int64)], dim=1)\n",
" all_padded_tensors.append(padded_tensor)\n",
" all_attention_masks.append(attention_mask)\n",
Expand All @@ -1978,11 +1995,11 @@
" top_p=0.95,\n",
" repetition_penalty=1.1,\n",
" num_return_sequences=1,\n",
" eos_token_id=128258,\n",
" eos_token_id=end_of_speech,\n",
" use_cache = True\n",
" )\n",
"token_to_find = 128257\n",
"token_to_remove = 128258\n",
"token_to_find = start_of_speech\n",
"token_to_remove = end_of_speech\n",
"\n",
"token_indices = (generated_ids == token_to_find).nonzero(as_tuple=True)\n",
"\n",
Expand All @@ -2006,7 +2023,7 @@
" row_length = row.size(0)\n",
" new_length = (row_length // 7) * 7\n",
" trimmed_row = row[:new_length]\n",
" trimmed_row = [t - 128266 for t in trimmed_row]\n",
" trimmed_row = [t - audio_tokens_start for t in trimmed_row]\n",
" code_lists.append(trimmed_row)\n",
"\n",
"\n",
Expand Down Expand Up @@ -2137,7 +2154,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 28/28 [00:01<00:00, 27.83it/s]\n"
"100%|██████████| 28/28 [00:01<00:00, 27.83it/s]\n"
]
},
{
Expand Down Expand Up @@ -2186,7 +2203,7 @@
" <a href=\"https://discord.gg/unsloth\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/Discord.png\" width=\"145\"></a>\n",
" <a href=\"https://docs.unsloth.ai/\"><img src=\"https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true\" width=\"125\"></a>\n",
"\n",
" Join Discord if you need help + \u2b50\ufe0f <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> \u2b50\ufe0f\n",
" Join Discord if you need help + ⭐️ <i>Star us on <a href=\"https://github.com/unslothai/unsloth\">Github</a> </i> ⭐️\n",
"</div>\n"
]
}
Expand Down Expand Up @@ -2257,9 +2274,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_4145e0f0ee3e45eaa45706496c45ad92",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_de11a736d05e463fa25eb5c134a33a42",
"value": "Filter:\u2007100%"
"value": "Filter: 100%"
}
},
"09a90177afbf47dfa75d09e562f78ef1": {
Expand Down Expand Up @@ -2370,9 +2387,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1104a11ea07348c09d9d6ea18a7a0c4f",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_c86528d68fb642c6b349e1f72309e9c2",
"value": "Filter:\u2007100%"
"value": "Filter: 100%"
}
},
"1104a11ea07348c09d9d6ea18a7a0c4f": {
Expand Down Expand Up @@ -2864,9 +2881,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_1c99dbc970074f9788d87847019699d4",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_75dd059f3d13436b9fa15a26c3d56b61",
"value": "\u20071195/1195\u2007[00:56&lt;00:00,\u200722.63\u2007examples/s]"
"value": " 1195/1195[00:56&lt;00:00, 22.63 examples/s]"
}
},
"54560d7f982a4af0b680c276cb74e8de": {
Expand Down Expand Up @@ -3011,9 +3028,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3deacc4ba8a540ef84bdf83d63c5446e",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_111b14c261a4468daa3f68b8dbc79629",
"value": "\u20071195/1195\u2007[00:00&lt;00:00,\u20072586.53\u2007examples/s]"
"value": " 1195/1195[00:00&lt;00:00, 2586.53 examples/s]"
}
},
"5c052294982d4da39950473ab7fd832f": {
Expand All @@ -3032,9 +3049,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_09a90177afbf47dfa75d09e562f78ef1",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_07fb4f7f1aab42f4a0d184ee2c5644ec",
"value": "\u20071195/1195\u2007[00:01&lt;00:00,\u2007988.84\u2007examples/s]"
"value": " 1195/1195[00:01&lt;00:00, 988.84 examples/s]"
}
},
"6a92469e5ff545e09ca55265ae57cb59": {
Expand Down Expand Up @@ -3210,9 +3227,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_54560d7f982a4af0b680c276cb74e8de",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_6a92469e5ff545e09ca55265ae57cb59",
"value": "\u20071195/1195\u2007[00:00&lt;00:00,\u20072636.42\u2007examples/s]"
"value": " 1195/1195[00:00&lt;00:00, 2636.42 examples/s]"
}
},
"8937c2575e9f48cb8498dc6554889a77": {
Expand Down Expand Up @@ -3387,9 +3404,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f04c54b9ac7e4af1b94276c109ed349e",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_bf9ea54f70f6427cbfed6e58a6463583",
"value": "Map:\u2007100%"
"value": "Map: 100%"
}
},
"a531278935a54a398db4f296ee88dca3": {
Expand Down Expand Up @@ -3424,9 +3441,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6f85d840cc744c268da6e7470ffe8a58",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_1db03134e899468bab57cad43f33bb6f",
"value": "\u20071195/1195\u2007[00:01&lt;00:00,\u2007692.61\u2007examples/s]"
"value": " 1195/1195[00:01&lt;00:00, 692.61 examples/s]"
}
},
"a8cde83e4c224ff5a519cbe9e850426e": {
Expand Down Expand Up @@ -3784,9 +3801,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_eb257008691346d3a48ca7a694a6d9fe",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_6ac48a8dfb674c7090c8831841c3bd4d",
"value": "Map:\u2007100%"
"value": "Map: 100%"
}
},
"f5199871f0dd40c498abfb9ecec75484": {
Expand All @@ -3805,9 +3822,9 @@
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5638ccc92d1342b2a63a3945b957a762",
"placeholder": "\u200b",
"placeholder": "",
"style": "IPY_MODEL_3d2aa872ce1a41fd81cdce2bd75a946d",
"value": "Map:\u2007100%"
"value": "Map: 100%"
}
},
"fe99495940c24e709e79d4af9991a2df": {
Expand Down Expand Up @@ -3942,4 +3959,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
Loading