From ee7bda5394937f52f121c8493700d41f61679b2a Mon Sep 17 00:00:00 2001 From: Tony Tang Date: Fri, 9 May 2025 15:25:04 -0700 Subject: [PATCH] Update token size for AI Search templates --- vectorize-amazons3-aisearch-request/default/workflow.json | 4 ++-- vectorize-amazons3-aisearch-schedule/default/workflow.json | 4 ++-- vectorize-azurefile-aisearch-request/default/workflow.json | 4 ++-- vectorize-azurefile-aisearch-schedule/default/workflow.json | 4 ++-- vectorize-azurequeue-aisearch-request/default/workflow.json | 4 ++-- vectorize-azurequeue-aisearch-schedule/default/workflow.json | 4 ++-- vectorize-dropbox-aisearch-request/default/workflow.json | 4 ++-- vectorize-dropbox-aisearch-schedule/default/workflow.json | 4 ++-- vectorize-onedrive-aisearch-request/default/workflow.json | 4 ++-- vectorize-onedrive-aisearch-schedule/default/workflow.json | 4 ++-- .../default/workflow.json | 4 ++-- .../default/workflow.json | 4 ++-- vectorize-servicebus-aisearch-schedule/default/workflow.json | 4 ++-- vectorize-sftp-aisearch-request/default/workflow.json | 4 ++-- vectorize-sftp-aisearch-schedule/default/workflow.json | 4 ++-- vectorize-sharepoint-aisearch-request/default/workflow.json | 4 ++-- vectorize-sharepoint-aisearch-schedule/default/workflow.json | 4 ++-- 17 files changed, 34 insertions(+), 34 deletions(-) diff --git a/vectorize-amazons3-aisearch-request/default/workflow.json b/vectorize-amazons3-aisearch-request/default/workflow.json index 606f39b..37608e4 100644 --- a/vectorize-amazons3-aisearch-request/default/workflow.json +++ b/vectorize-amazons3-aisearch-request/default/workflow.json @@ -75,8 +75,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-amazons3-aisearch-schedule/default/workflow.json b/vectorize-amazons3-aisearch-schedule/default/workflow.json index 6ac3c0a..c1ef39b 100644 --- a/vectorize-amazons3-aisearch-schedule/default/workflow.json +++ b/vectorize-amazons3-aisearch-schedule/default/workflow.json @@ -129,8 +129,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-azurefile-aisearch-request/default/workflow.json b/vectorize-azurefile-aisearch-request/default/workflow.json index a5f5e9b..556d8c8 100644 --- a/vectorize-azurefile-aisearch-request/default/workflow.json +++ b/vectorize-azurefile-aisearch-request/default/workflow.json @@ -33,8 +33,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-azurefile-aisearch-schedule/default/workflow.json b/vectorize-azurefile-aisearch-schedule/default/workflow.json index 28f7ffe..c0413c1 100644 --- a/vectorize-azurefile-aisearch-schedule/default/workflow.json +++ b/vectorize-azurefile-aisearch-schedule/default/workflow.json @@ -129,8 +129,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-azurequeue-aisearch-request/default/workflow.json b/vectorize-azurequeue-aisearch-request/default/workflow.json index 977889b..b44425b 100644 --- a/vectorize-azurequeue-aisearch-request/default/workflow.json +++ b/vectorize-azurequeue-aisearch-request/default/workflow.json @@ -28,8 +28,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-azurequeue-aisearch-schedule/default/workflow.json b/vectorize-azurequeue-aisearch-schedule/default/workflow.json index 3c96e0f..a478aa2 100644 --- a/vectorize-azurequeue-aisearch-schedule/default/workflow.json +++ b/vectorize-azurequeue-aisearch-schedule/default/workflow.json @@ -97,8 +97,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-dropbox-aisearch-request/default/workflow.json b/vectorize-dropbox-aisearch-request/default/workflow.json index 84cc27b..632d9bf 100644 --- a/vectorize-dropbox-aisearch-request/default/workflow.json +++ b/vectorize-dropbox-aisearch-request/default/workflow.json @@ -33,8 +33,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-dropbox-aisearch-schedule/default/workflow.json b/vectorize-dropbox-aisearch-schedule/default/workflow.json index 649589f..5e60ad0 100644 --- a/vectorize-dropbox-aisearch-schedule/default/workflow.json +++ b/vectorize-dropbox-aisearch-schedule/default/workflow.json @@ -42,8 +42,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-onedrive-aisearch-request/default/workflow.json b/vectorize-onedrive-aisearch-request/default/workflow.json index 8e09c53..ece655a 100644 --- a/vectorize-onedrive-aisearch-request/default/workflow.json +++ b/vectorize-onedrive-aisearch-request/default/workflow.json @@ -98,8 +98,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-onedrive-aisearch-schedule/default/workflow.json b/vectorize-onedrive-aisearch-schedule/default/workflow.json index fb731c8..eb74af5 100644 --- a/vectorize-onedrive-aisearch-schedule/default/workflow.json +++ b/vectorize-onedrive-aisearch-schedule/default/workflow.json @@ -148,8 +148,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Parse_a_document": { diff --git a/vectorize-onedriveforbusiness-aisearch-request/default/workflow.json b/vectorize-onedriveforbusiness-aisearch-request/default/workflow.json index de7ae82..b821adf 100644 --- a/vectorize-onedriveforbusiness-aisearch-request/default/workflow.json +++ b/vectorize-onedriveforbusiness-aisearch-request/default/workflow.json @@ -98,8 +98,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-onedriveforbusiness-aisearch-schedule/default/workflow.json b/vectorize-onedriveforbusiness-aisearch-schedule/default/workflow.json index 7a3478c..a7a5b6a 100644 --- a/vectorize-onedriveforbusiness-aisearch-schedule/default/workflow.json +++ b/vectorize-onedriveforbusiness-aisearch-schedule/default/workflow.json @@ -148,8 +148,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Parse_a_document": { diff --git a/vectorize-servicebus-aisearch-schedule/default/workflow.json b/vectorize-servicebus-aisearch-schedule/default/workflow.json index f62790a..49c7c48 100644 --- a/vectorize-servicebus-aisearch-schedule/default/workflow.json +++ b/vectorize-servicebus-aisearch-schedule/default/workflow.json @@ -73,8 +73,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 }, "runAfter": { "Parse_a_document": [ diff --git a/vectorize-sftp-aisearch-request/default/workflow.json b/vectorize-sftp-aisearch-request/default/workflow.json index 9a3006a..40a9b7b 100644 --- a/vectorize-sftp-aisearch-request/default/workflow.json +++ b/vectorize-sftp-aisearch-request/default/workflow.json @@ -33,8 +33,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-sftp-aisearch-schedule/default/workflow.json b/vectorize-sftp-aisearch-schedule/default/workflow.json index 73b04ee..417c911 100644 --- a/vectorize-sftp-aisearch-schedule/default/workflow.json +++ b/vectorize-sftp-aisearch-schedule/default/workflow.json @@ -42,8 +42,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Get_multiple_embeddings": { diff --git a/vectorize-sharepoint-aisearch-request/default/workflow.json b/vectorize-sharepoint-aisearch-request/default/workflow.json index 933c87e..cf1d06c 100644 --- a/vectorize-sharepoint-aisearch-request/default/workflow.json +++ b/vectorize-sharepoint-aisearch-request/default/workflow.json @@ -63,8 +63,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Azure_OpenAI_-_Get_multiple_embeddings": { diff --git a/vectorize-sharepoint-aisearch-schedule/default/workflow.json b/vectorize-sharepoint-aisearch-schedule/default/workflow.json index 1778892..2bcae01 100644 --- a/vectorize-sharepoint-aisearch-schedule/default/workflow.json +++ b/vectorize-sharepoint-aisearch-schedule/default/workflow.json @@ -60,8 +60,8 @@ "chunkingStrategy": "TokenSize", "text": "@body('Parse_a_document')?['text']", "encodingModel": "cl100k_base", - "tokenSize": 5000, - "pageOverlapLength": 0 + "tokenSize": 512, + "pageOverlapLength": 100 } }, "Azure_OpenAI_-_Get_multiple_embeddings": {