Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1fe4d04
added and tested: OLMo-1B,OLMo-7B
jonasrohw Dec 12, 2024
0f3e3b3
fixed: numpy do not do a major upgrade!
jonasrohw Dec 13, 2024
3a101f4
fixed: dimensions of 7b to be correct
jonasrohw Dec 13, 2024
1b34ccd
tested: Loading checkpoints & model variations
jonasrohw Dec 13, 2024
f0a0a68
Reimplement OLMoE changes.
joelburget Dec 14, 2024
8c094e5
Implement TODO (norm_topk_prob)
joelburget Dec 14, 2024
7565c06
Disable bos token for OLMoE.
joelburget Dec 14, 2024
04cd309
Add q and k norm.
joelburget Dec 15, 2024
68d6961
Correct normalization type for OLMoE.
joelburget Dec 15, 2024
9afd032
Merge pull request #1 from joelburget/olmoe
jonasrohw Dec 15, 2024
96c1fbb
Merge branch 'dev' into OLMo
jonasrohw Dec 15, 2024
72fb903
ran formatting
jonasrohw Dec 15, 2024
9d3a85e
Merge branch 'dev' into OLMo
bryce13950 Feb 4, 2025
d4519b2
Merge branch 'dev' into OLMo
bryce13950 Feb 5, 2025
064310f
tmp update for olmo2
Ja1Zhou Feb 1, 2025
b1fd04b
Fix: Olmo2 uses normalization after the attention/mlp
jonasrohw Feb 15, 2025
871ba03
Merge branch 'dev' into OLMo
bryce13950 Jun 16, 2025
7939e8d
ran format
bryce13950 Jun 16, 2025
97fd1e7
fixed some type issues
bryce13950 Jun 19, 2025
9032fe7
Merge branch 'dev' into OLMo
bryce13950 Jun 24, 2025
39703c4
OLMo 2 RMS
jleechung Jul 22, 2025
1c283c1
OLMo 2 RMS
jleechung Jul 22, 2025
688a421
Tested Instruct models
jleechung Jul 22, 2025
9febc5c
Merge pull request #3 from jleechung/OLMo
jonasrohw Jul 23, 2025
86b1fce
fix: Olmo2DecoderLayer type issues
taziksh Oct 11, 2025
fa5c885
fix type assertions for attention
taziksh Oct 11, 2025
148df46
chore: bump min Python to 3.10 for jaxtyping mypy plugin compatibility
taziksh Oct 12, 2025
1c60345
Merge dev and regenerate lock file
taziksh Oct 12, 2025
7aa3a91
fix: sort imports in olmo2.py
taziksh Oct 12, 2025
c8d443b
docs: update Colab notebook for OLMo models
taziksh Oct 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ docs/source/generated
# docs/source/_static/model_table
**.orig
.venv

170 changes: 141 additions & 29 deletions demos/Colab_Compatibility.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:17.715327Z",
"iopub.status.busy": "2025-10-12T05:25:17.715010Z",
"iopub.status.idle": "2025-10-12T05:25:17.736164Z",
"shell.execute_reply": "2025-10-12T05:25:17.735908Z"
}
},
"outputs": [
{
"name": "stdout",
Expand All @@ -16,9 +23,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/m3/z6c6rcdj1rbb2jh9vqpgvxg40000gn/T/ipykernel_86391/3507779555.py:18: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n",
"/var/folders/45/yfzw4rzj58974jvxcsdv72540000gn/T/ipykernel_77536/3507779555.py:18: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n",
" ipython.magic(\"load_ext autoreload\")\n",
"/var/folders/m3/z6c6rcdj1rbb2jh9vqpgvxg40000gn/T/ipykernel_86391/3507779555.py:19: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n",
"/var/folders/45/yfzw4rzj58974jvxcsdv72540000gn/T/ipykernel_77536/3507779555.py:19: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n",
" ipython.magic(\"autoreload 2\")\n"
]
}
Expand Down Expand Up @@ -58,14 +65,21 @@
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:17.756090Z",
"iopub.status.busy": "2025-10-12T05:25:17.755974Z",
"iopub.status.idle": "2025-10-12T05:25:34.901309Z",
"shell.execute_reply": "2025-10-12T05:25:34.900982Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TransformerLens currently supports 216 models out of the box.\n"
"TransformerLens currently supports 237 models out of the box.\n"
]
}
],
Expand All @@ -88,8 +102,15 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:34.902632Z",
"iopub.status.busy": "2025-10-12T05:25:34.902542Z",
"iopub.status.idle": "2025-10-12T05:25:34.926960Z",
"shell.execute_reply": "2025-10-12T05:25:34.926688Z"
}
},
"outputs": [],
"source": [
"def mark_models_as_tested(model_set: List[str]) -> None:\n",
Expand Down Expand Up @@ -198,8 +219,15 @@
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:34.928492Z",
"iopub.status.busy": "2025-10-12T05:25:34.928326Z",
"iopub.status.idle": "2025-10-12T05:25:34.947833Z",
"shell.execute_reply": "2025-10-12T05:25:34.947488Z"
}
},
"outputs": [],
"source": [
"# The following models can run in the T4 free environment\n",
Expand Down Expand Up @@ -324,8 +352,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:34.949521Z",
"iopub.status.busy": "2025-10-12T05:25:34.949408Z",
"iopub.status.idle": "2025-10-12T05:25:34.965842Z",
"shell.execute_reply": "2025-10-12T05:25:34.965557Z"
}
},
"outputs": [],
"source": [
"paid_gpu_models = [\n",
Expand Down Expand Up @@ -395,8 +430,15 @@
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:34.967319Z",
"iopub.status.busy": "2025-10-12T05:25:34.967234Z",
"iopub.status.idle": "2025-10-12T05:25:34.983486Z",
"shell.execute_reply": "2025-10-12T05:25:34.982975Z"
}
},
"outputs": [],
"source": [
"paid_cpu_models = [\n",
Expand Down Expand Up @@ -428,8 +470,15 @@
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:34.985377Z",
"iopub.status.busy": "2025-10-12T05:25:34.985250Z",
"iopub.status.idle": "2025-10-12T05:25:35.001910Z",
"shell.execute_reply": "2025-10-12T05:25:35.001633Z"
}
},
"outputs": [],
"source": [
"incompatible_models = [\n",
Expand Down Expand Up @@ -460,8 +509,15 @@
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:35.003433Z",
"iopub.status.busy": "2025-10-12T05:25:35.003310Z",
"iopub.status.idle": "2025-10-12T05:25:35.019609Z",
"shell.execute_reply": "2025-10-12T05:25:35.019193Z"
}
},
"outputs": [],
"source": [
"# The following models take a few extra steps to function. Check the official demo for more\n",
Expand All @@ -482,8 +538,15 @@
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:35.021077Z",
"iopub.status.busy": "2025-10-12T05:25:35.020962Z",
"iopub.status.idle": "2025-10-12T05:25:35.036567Z",
"shell.execute_reply": "2025-10-12T05:25:35.036247Z"
}
},
"outputs": [],
"source": [
"# These all work on the free version of Colab\n",
Expand All @@ -500,8 +563,15 @@
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:35.037854Z",
"iopub.status.busy": "2025-10-12T05:25:35.037772Z",
"iopub.status.idle": "2025-10-12T05:25:35.052311Z",
"shell.execute_reply": "2025-10-12T05:25:35.052040Z"
}
},
"outputs": [],
"source": [
"# This model works on the free version of Colab\n",
Expand All @@ -520,8 +590,15 @@
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:35.053702Z",
"iopub.status.busy": "2025-10-12T05:25:35.053626Z",
"iopub.status.idle": "2025-10-12T05:25:35.067316Z",
"shell.execute_reply": "2025-10-12T05:25:35.067062Z"
}
},
"outputs": [],
"source": [
"broken_models = [\n",
Expand All @@ -531,14 +608,49 @@
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"execution_count": 12,
"metadata": {
"execution": {
"iopub.execute_input": "2025-10-12T05:25:35.068598Z",
"iopub.status.busy": "2025-10-12T05:25:35.068519Z",
"iopub.status.idle": "2025-10-12T05:25:35.083572Z",
"shell.execute_reply": "2025-10-12T05:25:35.083375Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Baidicoot/Othello-GPT-Transformer-Lens\n"
"Baidicoot/Othello-GPT-Transformer-Lens"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"allenai/OLMo-1B-hf\n",
"allenai/OLMo-7B-hf\n",
"allenai/OLMo-7B-0724-hf\n",
"allenai/OLMo-7B-0724-SFT-hf\n",
"allenai/OLMo-7B-0724-Instruct-hf\n",
"allenai/OLMo-7B-0424-hf\n",
"allenai/OLMo-7B-Twin-2T-hf\n",
"allenai/OLMo-1B-0724-hf\n",
"allenai/OLMo-7B-Instruct-hf\n",
"allenai/OLMo-7B-SFT-hf\n",
"allenai/OLMoE-1B-7B-0924\n",
"allenai/OLMoE-1B-7B-0924-SFT\n",
"allenai/OLMoE-1B-7B-0924-Instruct\n",
"allenai/OLMo-2-0425-1B\n",
"allenai/OLMo-2-0425-1B-SFT\n",
"allenai/OLMo-2-0425-1B-DPO\n",
"allenai/OLMo-2-0425-1B-Instruct\n",
"allenai/OLMo-2-1124-7B\n",
"allenai/OLMo-2-1124-7B-SFT\n",
"allenai/OLMo-2-1124-7B-DPO\n",
"allenai/OLMo-2-1124-7B-Instruct\n"
]
}
],
Expand Down Expand Up @@ -566,7 +678,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
"version": "3.12.9"
}
},
"nbformat": 4,
Expand Down
Loading