diff --git a/ingest-doc-blob-document-intelligence-cosmos/default/manifest.json b/ingest-doc-blob-document-intelligence-cosmos/default/manifest.json index ddd06b7..239c27c 100644 --- a/ingest-doc-blob-document-intelligence-cosmos/default/manifest.json +++ b/ingest-doc-blob-document-intelligence-cosmos/default/manifest.json @@ -1,16 +1,16 @@ { "id": "default", - "title": "Document ingestion from Azure Blob Storage using Azure Document Intelligence OCR into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from Azure Blob Storage using Azure Document Intelligence OCR into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or quesstions to CDB4AI@Microsoft.com", "description": "", - "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You need to [create an Azure Document Intelligence resource](https://learn.microsoft.com/azure/ai-services/document-intelligence/overview). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", + "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You need to [create an Azure Document Intelligence resource](https://learn.microsoft.com/azure/ai-services/document-intelligence/overview). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", "kinds": [ "stateful", "stateless" ], "workflows": { "default": { - "name": "ingest-doc-blob-document-intelligence-cosmos" + "name": "cdb-doc-indexer-blob" } }, "artifacts": [ @@ -25,8 +25,8 @@ }, "parameters": [ { - "name": "OpenAI_TextEmbedding_Deployment_#workflowname#", - "displayName": "OpenAI text embedding deployment identifier", + "name": "AOAI_Emb_Dep_#workflowname#", + "displayName": "OpenAI embedding deployment", "type": "String", "description": "Provide the deployment model for Azure OpenAI.", "required": true @@ -40,35 +40,35 @@ }, { "name": "CosmosDB_Account_#workflowname#", - "displayName": "Cosmos DB Account", + "displayName": "Azure Cosmos DB account", "type": "String", "description": "Provide the name for the Azure Cosmos DB Account.", "required": true }, { "name": "CosmosDB_Database_#workflowname#", - "displayName": "Cosmos DB name", + "displayName": "Azure Cosmos DB database", "type": "String", "description": "Provide the name for the Azure Cosmos DB database.", "required": true }, { "name": "CosmosDB_Collection_#workflowname#", - "displayName": "Cosmos DB collection name", + "displayName": "Azure Cosmos DB collection", "type": "String", "description": "Provide the name for the Azure Cosmos DB collection.", "required": true }, { - "name": "CosmosDB_Vector_Path_#workflowname#", - "displayName": "Vector embeddings path or property", + "name": "CDB_Vector_#workflowname#", + "displayName": "Vector embeddings property", "type": "String", "description": "Provide the path or property for the vector embeddings in the Cosmos DB data items.", "required": true }, { - "name": "CosmosDB_Text_Path_#workflowname#", - "displayName": "Property with the document text in data items", + "name": "CDB_Text_#workflowname#", + "displayName": "Document text property", "type": "String", "description": "Provide the property that contains the document text in the Cosmos DB data items.", "required": true diff --git a/ingest-doc-blob-document-intelligence-cosmos/default/workflow.json b/ingest-doc-blob-document-intelligence-cosmos/default/workflow.json index c415500..dbcacba 100644 --- a/ingest-doc-blob-document-intelligence-cosmos/default/workflow.json +++ b/ingest-doc-blob-document-intelligence-cosmos/default/workflow.json @@ -6,7 +6,7 @@ "type": "ServiceProvider", "inputs": { "parameters": { - "deploymentId": "@parameters('OpenAI_TextEmbedding_Deployment_#workflowname#')", + "deploymentId": "@parameters('AOAI_Emb_Dep_#workflowname#')", "input": "@take(body('Chunk_text')?['value'], length(body('Chunk_text')['value']))" }, "serviceProviderConfiguration": { @@ -31,9 +31,9 @@ "inputs": { "from": "@range(0, length(body('Chunk_text')['value']))\r\n", "select": { - "@{parameters('CosmosDB_Text_Path_#workflowname#')}": "@{body('Chunk_text')['value'][item()]}", + "@{parameters('CDB_Text_#workflowname#')}": "@{body('Chunk_text')['value'][item()]}", "documentName": "@{triggerBody()?['name']}", - "@{parameters('CosmosDB_Vector_Path_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", + "@{parameters('CDB_Vector_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", "fileType": "@{triggerBody()?['properties']?['blobFullPathWithContainer']}", "id": "@{guid()}", "chunkNumber": "@{add(item(),1)}", diff --git a/ingest-doc-blob-document-intelligence-cosmos/manifest.json b/ingest-doc-blob-document-intelligence-cosmos/manifest.json index 4c3d0b7..a1826c2 100644 --- a/ingest-doc-blob-document-intelligence-cosmos/manifest.json +++ b/ingest-doc-blob-document-intelligence-cosmos/manifest.json @@ -1,13 +1,13 @@ { "id": "ingest-doc-blob-document-intelligence-cosmos", - "title": "Document ingestion from Azure Blob Storage using Azure Document Intelligence OCR into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from Azure Blob Storage using Azure Document Intelligence OCR into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or quesstions to CDB4AI@Microsoft.com", "skus": [ "standard" ], "workflows": { "default": { - "name": "ingest-doc-blob-document-intelligence-cosmos" + "name": "cdb-doc-indexer-blob" } }, "featuredConnectors": [ diff --git a/ingest-doc-sharepoint-document-intelligence-cosmos/default/manifest.json b/ingest-doc-sharepoint-document-intelligence-cosmos/default/manifest.json index 4c06b67..1193b84 100644 --- a/ingest-doc-sharepoint-document-intelligence-cosmos/default/manifest.json +++ b/ingest-doc-sharepoint-document-intelligence-cosmos/default/manifest.json @@ -1,16 +1,16 @@ { "id": "default", - "title": "Document ingestion from SharePoint using Azure Document Intelligence OCR into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from SharePoint using Azure Document Intelligence OCR into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or questions to CDB4AI@Microsoft.com.", "description": "", - "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You need to [create an Azure Document Intelligence resource](https://learn.microsoft.com/azure/ai-services/document-intelligence/overview). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", + "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You need to [create an Azure Document Intelligence resource](https://learn.microsoft.com/azure/ai-services/document-intelligence/overview). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", "kinds": [ "stateful", "stateless" ], "workflows": { "default": { - "name": "ingest-doc-sharepoint-document-intelligence-cosmos" + "name": "cdb-doc-indexer-sp" } }, "artifacts": [ @@ -25,22 +25,22 @@ }, "parameters": [ { - "name": "OpenAI_TextEmbedding_Deployment_#workflowname#", - "displayName": "OpenAI text embedding deployment identifier", + "name": "AOAI_Emb_Dep_#workflowname#", + "displayName": "OpenAI embedding deployment", "type": "String", "description": "Provide the deployment model for Azure OpenAI.", "required": true }, { "name": "Sharepoint_Library_Name_#workflowname#", - "displayName": "SharePoint Library Name", + "displayName": "SharePoint Library name", "type": "String", "description": "Provide the name of the SharePoint Library.", "required": true }, { "name": "Sharepoint_Site_Address_#workflowname#", - "displayName": "SharePoint Site address", + "displayName": "SharePoint site address", "type": "String", "description": "Provide the address of the SharePoint Site.", "required": true @@ -53,35 +53,35 @@ "required": true }, { "name": "CosmosDB_Account_#workflowname#", - "displayName": "Cosmos DB Account", + "displayName": "Azure Cosmos DB Account", "type": "String", "description": "Provide the name for the Azure Cosmos DB Account.", "required": true }, { "name": "CosmosDB_Database_#workflowname#", - "displayName": "Azure Cosmos DB name", + "displayName": "Azure Cosmos DB database", "type": "String", "description": "Provide the name for the Azure Cosmos DB database.", "required": true }, { "name": "CosmosDB_Collection_#workflowname#", - "displayName": "Cosmos DB collection name", + "displayName": "Azure Cosmos DB collection", "type": "String", "description": "Provide the name for the Azure Cosmos DB collection.", "required": true }, { - "name": "CosmosDB_Vector_Path_#workflowname#", - "displayName": "Vector embeddings path or property", + "name": "CDB_Vector_#workflowname#", + "displayName": "Vector embeddings property", "type": "String", "description": "Provide the path or property for the vector embeddings in the Cosmos DB data items.", "required": true }, { - "name": "CosmosDB_Text_Path_#workflowname#", - "displayName": "Property with the document text in data items", + "name": "CDB_Text_#workflowname#", + "displayName": "Property with document text", "type": "String", "description": "Provide the property that contains the document text in the Cosmos DB data items.", "required": true diff --git a/ingest-doc-sharepoint-document-intelligence-cosmos/default/workflow.json b/ingest-doc-sharepoint-document-intelligence-cosmos/default/workflow.json index ba7e2c8..47bdef4 100644 --- a/ingest-doc-sharepoint-document-intelligence-cosmos/default/workflow.json +++ b/ingest-doc-sharepoint-document-intelligence-cosmos/default/workflow.json @@ -5,7 +5,7 @@ "type": "ServiceProvider", "inputs": { "parameters": { - "deploymentId": "@parameters('OpenAI_TextEmbedding_Deployment_#workflowname#')", + "deploymentId": "@parameters('AOAI_Emb_Dep_#workflowname#')", "input": "@take(body('Chunk_text')?['value'], length(body('Chunk_text')['value']))" }, "serviceProviderConfiguration": { @@ -30,9 +30,9 @@ "inputs": { "from": "@range(0, length(body('Chunk_text')['value']))\r\n", "select": { - "@{parameters('CosmosDB_Text_Path_#workflowname#')}": "@{body('Chunk_text')['value'][item()]}", + "@{parameters('CDB_Text_#workflowname#')}": "@{body('Chunk_text')['value'][item()]}", "documentName": "@{triggerBody()?['{FilenameWithExtension}']}", - "@{parameters('CosmosDB_Vector_Path_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", + "@{parameters('CDB_Vector_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", "fileLink": "@{triggerBody()?['{Link}']}", "id": "@{guid()}", "chunkNumber": "@{add(item(),1)}", diff --git a/ingest-doc-sharepoint-document-intelligence-cosmos/manifest.json b/ingest-doc-sharepoint-document-intelligence-cosmos/manifest.json index 34ec843..52cb042 100644 --- a/ingest-doc-sharepoint-document-intelligence-cosmos/manifest.json +++ b/ingest-doc-sharepoint-document-intelligence-cosmos/manifest.json @@ -1,13 +1,13 @@ { "id": "ingest-doc-sharepoint-document-intelligence-cosmos", - "title": "Document ingestion from SharePoint using Azure Document Intelligence OCR into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from SharePoint using Azure Document Intelligence OCR into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents and images, using OCR powered by Azure Document Intelligence, from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or questions to CDB4AI@Microsoft.com.", "skus": [ "standard" ], "workflows": { "default": { - "name": "ingest-doc-sharepoint-document-intelligence-cosmos" + "name": "cdb-doc-indexer-sp" } }, "featuredConnectors": [ diff --git a/ingest-document-blob-openai-cosmos/default/manifest.json b/ingest-document-blob-openai-cosmos/default/manifest.json index e72b1d6..6348ee5 100644 --- a/ingest-document-blob-openai-cosmos/default/manifest.json +++ b/ingest-document-blob-openai-cosmos/default/manifest.json @@ -1,16 +1,16 @@ { "id": "default", - "title": "Document ingestion from Azure Blob Storage into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs and Markdown, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from Azure Blob Storage into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs and Markdown, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or questions to CDB4AI@Microsoft.com.", "description": "", - "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", + "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", "kinds": [ "stateful", "stateless" ], "workflows": { "default": { - "name": "ingest-document-blob-openai-cosmos" + "name": "cdb-doc-indexer-blob" } }, "artifacts": [ @@ -25,22 +25,22 @@ }, "parameters": [ { - "name": "OpenAI_TextEmbedding_Deployment_#workflowname#", - "displayName": "OpenAI text embedding deployment identifier", + "name": "AOAI_Emb_Dep_#workflowname#", + "displayName": "OpenAI embedding deployment", "type": "String", "description": "Provide the deployment model for Azure OpenAI.", "required": true }, { "name": "Blob_Path_#workflowname#", - "displayName": "Blob Storage documents path_#workflowname#", + "displayName": "Blob Storage documents path", "type": "String", "description": "Provide the path to the documents in Blob Storage.", "required": true }, { "name": "CosmosDB_Account_#workflowname#", - "displayName": "Cosmos DB Account", + "displayName": "Azure Cosmos DB account", "type": "String", "description": "Provide the name for the Azure Cosmos DB Account.", "required": true @@ -48,28 +48,28 @@ { "name": "CosmosDB_Database_#workflowname#", - "displayName": "Cosmos DB name", + "displayName": "Azure Cosmos DB database", "type": "String", "description": "Provide the name for the Azure Cosmos DB database.", "required": true }, { "name": "CosmosDB_Collection_#workflowname#", - "displayName": "Cosmos DB collection name", + "displayName": "Azure Cosmos DB collection", "type": "String", "description": "Provide the name for the Azure Cosmos DB collection.", "required": true }, { - "name": "CosmosDB_Vector_Path_#workflowname#", - "displayName": "Vector embeddings path or property", + "name": "CDB_Vector_#workflowname#", + "displayName": "Vector embeddings property", "type": "String", "description": "Provide the path or property for the vector embeddings in the Cosmos DB data items.", "required": true }, { - "name": "CosmosDB_Text_Path_#workflowname#", - "displayName": "Property with the document text in data items", + "name": "CDB_Text_#workflowname#", + "displayName": "Document text property", "type": "String", "description": "Provide the property that contains the document text in the Cosmos DB data items.", "required": true diff --git a/ingest-document-blob-openai-cosmos/default/workflow.json b/ingest-document-blob-openai-cosmos/default/workflow.json index 6cfd9f4..d2f0eb6 100644 --- a/ingest-document-blob-openai-cosmos/default/workflow.json +++ b/ingest-document-blob-openai-cosmos/default/workflow.json @@ -20,7 +20,7 @@ "type": "ServiceProvider", "inputs": { "parameters": { - "deploymentId": "@parameters('OpenAI_TextEmbedding_Deployment_#workflowname#')", + "deploymentId": "@parameters('AOAI_Emb_Dep_#workflowname#')", "input": "@take(body('Chunk_text')?['value'], length(body('Chunk_text')['value']))" }, "serviceProviderConfiguration": { @@ -66,9 +66,9 @@ "inputs": { "from": "@range(0, length(body('Chunk_text')['value']))\r\n", "select": { - "@{parameters('CosmosDB_Text_Path_#workflowname#')}": "@body('Chunk_text')['value'][item()]", + "@{parameters('CDB_Text_#workflowname#')}": "@body('Chunk_text')['value'][item()]", "documentName": "@triggerBody()?['name']", - "@{parameters('CosmosDB_Vector_Path_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", + "@{parameters('CDB_Vector_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", "sourceFile": "@triggerBody()?['name']", "fileType": "@slice(triggerBody()?['name'],-3)", "id": "@guid()", diff --git a/ingest-document-blob-openai-cosmos/manifest.json b/ingest-document-blob-openai-cosmos/manifest.json index 016a5cd..f8812ae 100644 --- a/ingest-document-blob-openai-cosmos/manifest.json +++ b/ingest-document-blob-openai-cosmos/manifest.json @@ -1,13 +1,13 @@ { "id": "ingest-document-blob-openai-cosmos", - "title": "Document ingestion from Azure Blob Storage into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs and Markdown, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from Azure Blob Storage into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs and Markdown, from Azure Blob Storage, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or questions to CDB4AI@Microsoft.com.", "skus": [ "standard" ], "workflows": { "default": { - "name": "ingest-document-blob-openai-cosmos" + "name": "cdb-doc-indexer-blob" } }, "featuredConnectors": [ diff --git a/ingest-document-sharepoint-openai-cosmos/default/manifest.json b/ingest-document-sharepoint-openai-cosmos/default/manifest.json index 86fb683..a01b5a0 100644 --- a/ingest-document-sharepoint-openai-cosmos/default/manifest.json +++ b/ingest-document-sharepoint-openai-cosmos/default/manifest.json @@ -1,12 +1,12 @@ { "id": "default", - "title": "Document ingestion from SharePoint into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs, text files, Markdown, etc. from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from SharePoint into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs, text files, Markdown, etc. from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or questions to CDB4AI@Microsoft.com.", "description": "", - "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", + "prerequisites": "Before you use this template, you need to configure an Azure Open AI instance with an embedding model. For more detailed prerequisites, see the [Azure Logic Apps project sample on GitHub - Create a Chat with Your Data](https://github.com/Azure/logicapps/tree/master/LogicApps-AI-RAG-Demo). You also need to [create an Azure Cosmos DB database](https://learn.microsoft.com/azure/cosmos-db/nosql/quickstart-dotnet), and create a connection by using the keys and endpoint. Also, get more familiar with [Vector Search in Azure Cosmos DB](https://learn.microsoft.com/azure/cosmos-db/nosql/vector-search).", "workflows": { "default": { - "name": "ingest-document-sharepoint-openai-cosmos" + "name": "cdb-doc-indexer-sp" } }, "kinds": [ @@ -25,22 +25,22 @@ }, "parameters": [ { - "name": "OpenAI_TextEmbedding_Deployment_#workflowname#", - "displayName": "OpenAI text embedding deployment identifier", + "name": "AOAI_Emb_Dep_#workflowname#", + "displayName": "OpenAI embedding deployment", "type": "String", "description": "Provide the deployment model for Azure OpenAI.", "required": true }, { "name": "Sharepoint_Library_Name_#workflowname#", - "displayName": "SharePoint Library Name", + "displayName": "SharePoint Library name", "type": "String", "description": "Provide the name of the SharePoint Library.", "required": true }, { "name": "Sharepoint_Site_Address_#workflowname#", - "displayName": "SharePoint Site address", + "displayName": "SharePoint site address", "type": "String", "description": "Provide the address of the SharePoint Site.", "required": true @@ -54,35 +54,35 @@ }, { "name": "CosmosDB_Account_#workflowname#", - "displayName": "Cosmos DB Account", + "displayName": "Azure Cosmos DB account", "type": "String", "description": "Provide the name for the Azure Cosmos DB Account.", "required": true }, { "name": "CosmosDB_Database_#workflowname#", - "displayName": "Azure Cosmos DB name", + "displayName": "Azure Cosmos DB database", "type": "String", "description": "Provide the name for the Azure Cosmos DB database.", "required": true }, { "name": "CosmosDB_Collection_#workflowname#", - "displayName": "Cosmos DB collection name", + "displayName": "Azure Cosmos DB collection", "type": "String", "description": "Provide the name for the Azure Cosmos DB collection.", "required": true }, { - "name": "CosmosDB_Vector_Path_#workflowname#", - "displayName": "Vector embeddings path or property", + "name": "CDB_Vector_#workflowname#", + "displayName": "Vector embeddings property", "type": "String", "description": "Provide the path or property for the vector embeddings in the Cosmos DB data items.", "required": true }, { - "name": "CosmosDB_Text_Path_#workflowname#", - "displayName": "Property with the document text in data items", + "name": "CDB_Text_#workflowname#", + "displayName": "Document text property", "type": "String", "description": "Provide the property that contains the document text in the Cosmos DB data items.", "required": true diff --git a/ingest-document-sharepoint-openai-cosmos/default/workflow.json b/ingest-document-sharepoint-openai-cosmos/default/workflow.json index f75ed08..3bbd139 100644 --- a/ingest-document-sharepoint-openai-cosmos/default/workflow.json +++ b/ingest-document-sharepoint-openai-cosmos/default/workflow.json @@ -20,7 +20,7 @@ "type": "ServiceProvider", "inputs": { "parameters": { - "deploymentId": "@parameters('OpenAI_TextEmbedding_Deployment_#workflowname#')", + "deploymentId": "@parameters('AOAI_Emb_Dep_#workflowname#')", "input": "@take(body('Chunk_text')?['value'], length(body('Chunk_text')['value']))" }, "serviceProviderConfiguration": { @@ -72,9 +72,9 @@ "inputs": { "from": "@range(0, length(body('Chunk_text')['value']))\r\n", "select": { - "@{parameters('CosmosDB_Text_Path_#workflowname#')}": "@{body('Chunk_text')['value'][item()]}", + "@{parameters('CDB_Text_#workflowname#')}": "@{body('Chunk_text')['value'][item()]}", "documentName": "@{triggerBody()?['{FilenameWithExtension}']}", - "@{parameters('CosmosDB_Vector_Path_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", + "@{parameters('CDB_Vector_#workflowname#')}": "@body('Get_multiple_embeddings')['embeddings'][item()]", "fileLink": "@{triggerBody()?['{Link}']}", "id": "@{guid()}", "chunkNumber": "@{add(item(),1)}", diff --git a/ingest-document-sharepoint-openai-cosmos/manifest.json b/ingest-document-sharepoint-openai-cosmos/manifest.json index b0ab749..069cc57 100644 --- a/ingest-document-sharepoint-openai-cosmos/manifest.json +++ b/ingest-document-sharepoint-openai-cosmos/manifest.json @@ -1,13 +1,13 @@ { "id": "ingest-document-sharepoint-openai-cosmos", - "title": "Document ingestion from SharePoint into Azure Cosmos DB for RAG", - "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs, text files, Markdown, etc. from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL.", + "title": "Document ingestion from SharePoint into Azure Cosmos DB", + "summary": "To support search and retrieval purposes for the retrieval-augmented generation (RAG) pattern, this workflow reads documents, such as PDFs, text files, Markdown, etc. from Microsoft SharePoint, chunks the text, creates vector embeddings, formats the resulting data into a JSON document, and inserts that content into Azure Cosmos DB for NoSQL. Please send any feedback or questions to CDB4AI@Microsoft.com.", "skus": [ "standard" ], "workflows": { "default": { - "name": "ingest-document-sharepoint-openai-cosmos" + "name": "cdb-doc-indexer-sp" } }, "featuredConnectors": [