diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 9a802831e..886499c40 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -233,8 +233,8 @@ jobs: - "Exploratory_Analysis_Demo" # - "Grokking_Demo" # - "Head_Detector_Demo" - # - "Interactive_Neuroscope" - # - "LLaMA" + - "Interactive_Neuroscope" + - "LLaMA" # - "LLaMA2_GPU_Quantized" - "Main_Demo" # - "No_Position_Experiment" diff --git a/demos/LLaMA.ipynb b/demos/LLaMA.ipynb index 8b0e09ee4..72779e600 100644 --- a/demos/LLaMA.ipynb +++ b/demos/LLaMA.ipynb @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -110,7 +110,7 @@ "from transformer_lens.hook_points import (\n", " HookPoint,\n", ") # Hooking utilities\n", - "from transformer_lens import HookedTransformer\n", + "from transformer_lens.model_bridge import TransformerBridge\n", "\n", "torch.set_grad_enabled(False)\n", "\n", @@ -164,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -174,7 +174,7 @@ " tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH)\n", " hf_model = LlamaForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True)\n", "\n", - " model = HookedTransformer.from_pretrained(\n", + " model = TransformerBridge.boot_transformers(\n", " \"llama-7b\",\n", " hf_model=hf_model,\n", " device=\"cpu\",\n", @@ -183,6 +183,7 @@ " center_unembed=False,\n", " tokenizer=tokenizer,\n", " )\n", + " model.enable_compatibility_mode()\n", "\n", " model = model.to(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", " model.generate(\"The capital of Germany is\", max_new_tokens=20, temperature=0)" @@ -204,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -274,7 +275,8 @@ "tokenizer = LlamaTokenizer.from_pretrained(LLAMA_2_7B_CHAT_PATH)\n", "hf_model = LlamaForCausalLM.from_pretrained(LLAMA_2_7B_CHAT_PATH, low_cpu_mem_usage=True)\n", "\n", - "model = HookedTransformer.from_pretrained(LLAMA_2_7B_CHAT_PATH, device=\"cpu\", fold_ln=False, center_writing_weights=False, center_unembed=False)\n", + "model = TransformerBridge.boot_transformers(LLAMA_2_7B_CHAT_PATH, device=\"cpu\", fold_ln=False, center_writing_weights=False, center_unembed=False)\n", + "model.enable_compatibility_mode()\n", "\n", "model = model.to(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model.generate(\"The capital of Germany is\", max_new_tokens=20, temperature=0)"