Merge pull request #28 from github/switch-to-github-models-api

garman · web-flow · commit 4e871bb640cd · 2025-07-22T09:06:59.000-04:00
Switch to GitHub models API for embeddings requests
diff --git a/.env-sample b/.env-sample
@@ -1,2 +1,3 @@
 # get your pat token from: https://github.com/settings/tokens?type=beta
+# if creating a new token, ensure it has `models: read` permissions
 GITHUB_TOKEN="github_pat_****"
diff --git a/cookbooks/python/llamaindex/rag_getting_started.ipynb b/cookbooks/python/llamaindex/rag_getting_started.ipynb
@@ -60,7 +60,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\""
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\""
    ]
   },
   {
diff --git a/samples/js/azure_ai_inference/embeddings.js b/samples/js/azure_ai_inference/embeddings.js
@@ -3,7 +3,7 @@ import { isUnexpected } from "@azure-rest/ai-inference";
 import { AzureKeyCredential } from "@azure/core-auth";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.inference.ai.azure.com";
+const endpoint = "https://models.github.ai/inference";
 
 /* By using the Azure AI Inference SDK, you can easily experiment with different models
    by modifying the value of `modelName` in the code below. For this code sample
@@ -32,9 +32,9 @@ export async function main() {
   for (const item of response.body.data) {
     let length = item.embedding.length;
     console.log(
-	  `data[${item.index}]: length=${length}, ` +
-	  `[${item.embedding[0]}, ${item.embedding[1]}, ` +
-	  `..., ${item.embedding[length - 2]}, ${item.embedding[length -1]}]`);
+      `data[${item.index}]: length=${length}, ` +
+      `[${item.embedding[0]}, ${item.embedding[1]}, ` +
+      `..., ${item.embedding[length - 2]}, ${item.embedding[length - 1]}]`);
   }
   console.log(response.body.usage);
 }
diff --git a/samples/js/openai/embeddings.js b/samples/js/openai/embeddings.js
@@ -1,7 +1,7 @@
 import OpenAI from "openai";
 
 const token = process.env["GITHUB_TOKEN"];
-const endpoint = "https://models.inference.ai.azure.com";
+const endpoint = "https://models.github.ai/inference";
 
 /* Pick one of the OpenAI embeddings models from the GitHub Models service */
 const modelName = "text-embedding-3-small";
@@ -11,16 +11,16 @@ export async function main() {
   const client = new OpenAI({ baseURL: endpoint, apiKey: token });
 
   const response = await client.embeddings.create({
-	input: ["first phrase", "second phrase", "third phrase"],
-	model: modelName     
+    input: ["first phrase", "second phrase", "third phrase"],
+    model: modelName
   });
 
   for (const item of response.data) {
-	let length = item.embedding.length;
-	console.log(
-		`data[${item.index}]: length=${length}, ` +
-		`[${item.embedding[0]}, ${item.embedding[1]}, ` +
-		`..., ${item.embedding[length - 2]}, ${item.embedding[length -1]}]`);
+    let length = item.embedding.length;
+    console.log(
+      `data[${item.index}]: length=${length}, ` +
+      `[${item.embedding[0]}, ${item.embedding[1]}, ` +
+      `..., ${item.embedding[length - 2]}, ${item.embedding[length - 1]}]`);
   }
   console.log(response.usage);
 }
diff --git a/samples/python/azure_ai_inference/embeddings.py b/samples/python/azure_ai_inference/embeddings.py
@@ -4,13 +4,13 @@
 from azure.core.credentials import AzureKeyCredential
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.inference.ai.azure.com"
+endpoint = "https://models.github.ai/inference"
 
 # By using the Azure AI Inference SDK, you can easily experiment with different models
 # by modifying the value of `modelName` in the code below. For this code sample
 # you need an embedding model. The following embedding models are
 # available in the GitHub Models service:
-# 
+#
 # Cohere: Cohere-embed-v3-english, Cohere-embed-v3-multilingual
 # Azure OpenAI: text-embedding-3-small, text-embedding-3-large
 model_name = "text-embedding-3-small"
diff --git a/samples/python/azure_ai_inference/getting_started.ipynb b/samples/python/azure_ai_inference/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.inference.ai.azure.com\"\n",
+    "endpoint = \"https://models.github.ai/inference\"\n",
     "\n",
     "\n",
     "# Create a client\n",
@@ -117,7 +117,7 @@
     "    # Optional parameters\n",
     "    temperature=1.,\n",
     "    max_tokens=1000,\n",
-    "    top_p=1.    \n",
+    "    top_p=1.\n",
     ")\n",
     "\n",
     "print(response.choices[0].message.content)"
diff --git a/samples/python/mistralai/getting_started.ipynb b/samples/python/mistralai/getting_started.ipynb
@@ -58,7 +58,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "github_token = os.environ[\"GITHUB_TOKEN\"]\n",
-    "endpoint = \"https://models.inference.ai.azure.com\"\n",
+    "endpoint = \"https://models.github.ai/inference\"\n",
     "\n",
     "# Pick one of the Mistral models from the GitHub Models service\n",
     "model_name = \"Mistral-large\"\n",
@@ -99,7 +99,7 @@
     "    # Optional parameters\n",
     "    temperature=1.,\n",
     "    max_tokens=1000,\n",
-    "    top_p=1.    \n",
+    "    top_p=1.\n",
     ")\n",
     "\n",
     "print(response.choices[0].message.content)"
diff --git a/samples/python/openai/embeddings.py b/samples/python/openai/embeddings.py
@@ -2,7 +2,7 @@
 from openai import OpenAI
 
 token = os.environ["GITHUB_TOKEN"]
-endpoint = "https://models.inference.ai.azure.com"
+endpoint = "https://models.github.ai/inference"
 
 # Pick one of the OpenAI embeddings models from the GitHub Models service
 model_name = "text-embedding-3-small"
diff --git a/samples/python/openai/embeddings_getting_started.ipynb b/samples/python/openai/embeddings_getting_started.ipynb
@@ -55,7 +55,7 @@
     "    raise ValueError(\"GITHUB_TOKEN is not set\")\n",
     "\n",
     "os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",
-    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\"\n",
+    "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\"\n",
     "\n",
     "client = OpenAI()\n"
    ]
@@ -77,7 +77,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_name = \"text-embedding-3-small\" \n",
+    "model_name = \"text-embedding-3-small\"\n",
     "\n",
     "response = client.embeddings.create(\n",
     "    model=model_name,\n",
@@ -105,7 +105,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_name = \"text-embedding-3-small\" \n",
+    "model_name = \"text-embedding-3-small\"\n",
     "inputs = [\"Hello, world!\", \"How are you?\", \"What's the weather like?\"]\n",
     "\n",
     "response = client.embeddings.create(\n",
diff --git a/samples/python/openai/multi_turn.py b/samples/python/openai/multi_turn.py
@@ -16,6 +16,9 @@
 client = OpenAI(
     base_url=endpoint,
     api_key=token,
+    default_headers={
+        "x-ms-useragent": "github-models-sample",
+    }
 )
 
 # Call the chat completion API
@@ -42,4 +45,4 @@
 )
 
 # Print the response
-print(response.choices[0].message.content)
+print(response.choices[0].message.content)

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`# get your pat token from: https://github.com/settings/tokens?type=beta`
	`2`	+# if creating a new token, ensure it has `models: read` permissions
`2`	`3`	`GITHUB_TOKEN="github_pat_****"`
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@`
`60`	`60`	`" raise ValueError(\"GITHUB_TOKEN is not set\")\n",`
`61`	`61`	`"\n",`
`62`	`62`	`"os.environ[\"OPENAI_API_KEY\"] = os.getenv(\"GITHUB_TOKEN\")\n",`
`63`		`- "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.inference.ai.azure.com/\""`
	`63`	`+ "os.environ[\"OPENAI_BASE_URL\"] = \"https://models.github.ai/inference\""`
`64`	`64`	`]`
`65`	`65`	`},`
`66`	`66`	`{`
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,9 @@`
`16`	`16`	`client = OpenAI(`
`17`	`17`	`base_url=endpoint,`
`18`	`18`	`api_key=token,`
	`19`	`+ default_headers={`
	`20`	`+ "x-ms-useragent": "github-models-sample",`
	`21`	`+ }`
`19`	`22`	`)`
`20`	`23`
`21`	`24`	`# Call the chat completion API`
`@@ -42,4 +45,4 @@`
`42`	`45`	`)`
`43`	`46`
`44`	`47`	`# Print the response`
`45`		`-print(response.choices[0].message.content)`
	`48`	`+print(response.choices[0].message.content)`