From 273684ba93be9ec36d8b20ec45cde42ec39554a9 Mon Sep 17 00:00:00 2001 From: arthurgousset <46296830+arthurgousset@users.noreply.github.com> Date: Sat, 17 May 2025 15:48:53 -0700 Subject: [PATCH 1/2] test(azure_openai_repro): example script to reproduce the bug Can't reproduce the bug with this script, seems to be resolved in the client already unless I'm missing something. --- azure_openai_repro.py | 69 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 azure_openai_repro.py diff --git a/azure_openai_repro.py b/azure_openai_repro.py new file mode 100644 index 00000000000..fac165d0863 --- /dev/null +++ b/azure_openai_repro.py @@ -0,0 +1,69 @@ +""" +Set API key when running test + +```sh +$ AZURE_OPENAI_API_KEY="azure-openai-api-key" python +``` + +Set `OPENAI_LOG=debug` to see more verbose debug logs + +```sh +$ OPENAI_LOG=debug AZURE_OPENAI_API_KEY="azure-openai-api-key" python +``` +""" + +import chromadb +import os +from openai import AzureOpenAI +from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction + +""" +Constants +""" +# Source: https://portal.azure.com +AZURE_ENDPOINT = "https://chroma-repro-bug.openai.azure.com/" + +# Source: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/#overview +AZURE_MODEL_NAME = "text-embedding-3-small" + +# Source: https://portal.azure.com +DEPLOYMENT = "text-embedding-3-small" + +# Source: https://portal.azure.com +API_VERSION = "2024-02-01" + +# Source: https://github.com/openai/openai-python?tab=readme-ov-file#microsoft-azure-openai +AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") +if not AZURE_OPENAI_API_KEY: + raise ValueError("AZURE_OPENAI_API_KEY environment variable is not set") + +# Creates a custom embedding function that uses our Azure OpenAI deployment +my_azure_openai_embedding_function = OpenAIEmbeddingFunction( + api_type="azure", + deployment_id=DEPLOYMENT, + api_version=API_VERSION, + api_base=AZURE_ENDPOINT, + model_name=AZURE_MODEL_NAME, + api_key=AZURE_OPENAI_API_KEY, +) + +# Creates a Chroma client +client = chromadb.Client() + +# Creates a Chroma collection using our custom embedding function +collection = client.create_collection("my_collection", embedding_function=my_azure_openai_embedding_function) + +# Embeds some example text +test = collection.add( + ids=["cat_embedding", "apple_embedding", "san_francisco_embedding"], + documents=["Cat", "Apple", "San Francisco"], +) + +# Queries our collection for the nearest neighbors to the example text +nearest_neighbors = collection.query( + query_texts=["Dog"], + n_results=1, +) + +# Prints the nearest neighbors +print(nearest_neighbors) \ No newline at end of file From caf86a492994574ac1cd105575022d85ed738fa4 Mon Sep 17 00:00:00 2001 From: arthurgousset <46296830+arthurgousset@users.noreply.github.com> Date: Sat, 17 May 2025 15:53:02 -0700 Subject: [PATCH 2/2] chore(azure_openai_repro): delete unused openai import --- azure_openai_repro.py | 1 - 1 file changed, 1 deletion(-) diff --git a/azure_openai_repro.py b/azure_openai_repro.py index fac165d0863..4a9ea04b95e 100644 --- a/azure_openai_repro.py +++ b/azure_openai_repro.py @@ -14,7 +14,6 @@ import chromadb import os -from openai import AzureOpenAI from chromadb.utils.embedding_functions.openai_embedding_function import OpenAIEmbeddingFunction """