Skip to content

Commit 287941c

Browse files
Addressed comments
1 parent fe18943 commit 287941c

File tree

1 file changed

+172
-10
lines changed

1 file changed

+172
-10
lines changed

huggingface/query_based/hugging_face.ipynb

Lines changed: 172 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -771,10 +771,35 @@
771771
},
772772
{
773773
"cell_type": "code",
774-
"execution_count": null,
774+
"execution_count": 8,
775775
"id": "fd30f9c9",
776776
"metadata": {},
777-
"outputs": [],
777+
"outputs": [
778+
{
779+
"name": "stdout",
780+
"output_type": "stream",
781+
"text": [
782+
"Testing baseline performance without Hyperscale optimization...\n",
783+
"\n",
784+
"=== PHASE 1: BASELINE VECTOR SEARCH ===\n",
785+
"Query: \"What are the key features of a scalable NoSQL database?\"\n",
786+
"Search Time: 0.1484 seconds\n",
787+
"Results Found: 3 documents\n",
788+
"\n",
789+
"[Result 1]\n",
790+
"Vector Distance: 0.586197 (lower = more similar)\n",
791+
"Document Content: Couchbase Server is a multipurpose, distributed database that fuses the strengths of relational databases such as SQL and ACID transactions with JSON's versatility, with a foundation that is extremely fast and scalable.\n",
792+
"\n",
793+
"[Result 2]\n",
794+
"Vector Distance: 0.645435 (lower = more similar)\n",
795+
"Document Content: It's used across industries for things like user profiles, dynamic product catalogs, GenAI apps, vector search, high-speed caching, and much more.\n",
796+
"\n",
797+
"[Result 3]\n",
798+
"Vector Distance: 0.976888 (lower = more similar)\n",
799+
"Document Content: this is a sample text with the data \"hello\"\n"
800+
]
801+
}
802+
],
778803
"source": [
779804
"test_query = \"What are the key features of a scalable NoSQL database?\"\n",
780805
"print(\"Testing baseline performance without Hyperscale optimization...\")\n",
@@ -801,10 +826,39 @@
801826
},
802827
{
803828
"cell_type": "code",
804-
"execution_count": null,
829+
"execution_count": 9,
805830
"id": "9e2fa28e",
806831
"metadata": {},
807-
"outputs": [],
832+
"outputs": [
833+
{
834+
"name": "stdout",
835+
"output_type": "stream",
836+
"text": [
837+
"Creating Hyperscale Vector Index...\n",
838+
"✓ Hyperscale Vector Index created successfully!\n",
839+
"Waiting for index to become available...\n",
840+
"\n",
841+
"Testing performance with Hyperscale optimization...\n",
842+
"\n",
843+
"=== PHASE 2: OPTIMIZED SEARCH ===\n",
844+
"Query: \"What are the key features of a scalable NoSQL database?\"\n",
845+
"Search Time: 0.0848 seconds\n",
846+
"Results Found: 3 documents\n",
847+
"\n",
848+
"[Result 1]\n",
849+
"Vector Distance: 0.586197 (lower = more similar)\n",
850+
"Document Content: Couchbase Server is a multipurpose, distributed database that fuses the strengths of relational databases such as SQL and ACID transactions with JSON's versatility, with a foundation that is extremely fast and scalable.\n",
851+
"\n",
852+
"[Result 2]\n",
853+
"Vector Distance: 0.645435 (lower = more similar)\n",
854+
"Document Content: It's used across industries for things like user profiles, dynamic product catalogs, GenAI apps, vector search, high-speed caching, and much more.\n",
855+
"\n",
856+
"[Result 3]\n",
857+
"Vector Distance: 0.976888 (lower = more similar)\n",
858+
"Document Content: this is a sample text with the data \"hello\"\n"
859+
]
860+
}
861+
],
808862
"source": [
809863
"# Create Hyperscale index for optimized vector search\n",
810864
"print(\"Creating Hyperscale Vector Index...\")\n",
@@ -852,10 +906,50 @@
852906
},
853907
{
854908
"cell_type": "code",
855-
"execution_count": null,
909+
"execution_count": 10,
856910
"id": "d52edb51",
857911
"metadata": {},
858-
"outputs": [],
912+
"outputs": [
913+
{
914+
"name": "stdout",
915+
"output_type": "stream",
916+
"text": [
917+
"Setting up Couchbase cache for improved performance on repeated queries...\n",
918+
"✓ Couchbase cache enabled!\n",
919+
"\n",
920+
"Testing cache benefits with a different query...\n",
921+
"First execution (cache miss):\n",
922+
"\n",
923+
"=== PHASE 3A: FIRST QUERY (CACHE MISS) ===\n",
924+
"Query: \"How does a distributed database handle high-speed operations?\"\n",
925+
"Search Time: 0.1024 seconds\n",
926+
"Results Found: 2 documents\n",
927+
"\n",
928+
"[Result 1]\n",
929+
"Vector Distance: 0.632770 (lower = more similar)\n",
930+
"Document Content: Couchbase Server is a multipurpose, distributed database that fuses the strengths of relational databases such as SQL and ACID transactions with JSON's versatility, with a foundation that is extremely fast and scalable.\n",
931+
"\n",
932+
"[Result 2]\n",
933+
"Vector Distance: 0.677951 (lower = more similar)\n",
934+
"Document Content: It's used across industries for things like user profiles, dynamic product catalogs, GenAI apps, vector search, high-speed caching, and much more.\n",
935+
"\n",
936+
"Second execution (cache hit):\n",
937+
"\n",
938+
"=== PHASE 3B: REPEATED QUERY (CACHE HIT) ===\n",
939+
"Query: \"How does a distributed database handle high-speed operations?\"\n",
940+
"Search Time: 0.0289 seconds\n",
941+
"Results Found: 2 documents\n",
942+
"\n",
943+
"[Result 1]\n",
944+
"Vector Distance: 0.632770 (lower = more similar)\n",
945+
"Document Content: Couchbase Server is a multipurpose, distributed database that fuses the strengths of relational databases such as SQL and ACID transactions with JSON's versatility, with a foundation that is extremely fast and scalable.\n",
946+
"\n",
947+
"[Result 2]\n",
948+
"Vector Distance: 0.677951 (lower = more similar)\n",
949+
"Document Content: It's used across industries for things like user profiles, dynamic product catalogs, GenAI apps, vector search, high-speed caching, and much more.\n"
950+
]
951+
}
952+
],
859953
"source": [
860954
"# Set up Couchbase cache (can be applied to any search approach)\n",
861955
"print(\"Setting up Couchbase cache for improved performance on repeated queries...\")\n",
@@ -901,10 +995,38 @@
901995
},
902996
{
903997
"cell_type": "code",
904-
"execution_count": null,
998+
"execution_count": 11,
905999
"id": "640a1bbe",
9061000
"metadata": {},
907-
"outputs": [],
1001+
"outputs": [
1002+
{
1003+
"name": "stdout",
1004+
"output_type": "stream",
1005+
"text": [
1006+
"\n",
1007+
"================================================================================\n",
1008+
"VECTOR SEARCH PERFORMANCE OPTIMIZATION SUMMARY\n",
1009+
"================================================================================\n",
1010+
"Phase 1 - Baseline (Raw Search): 0.1484 seconds\n",
1011+
"Phase 2 - Optimized Search: 0.0848 seconds\n",
1012+
"Phase 3 - Cache Benefits:\n",
1013+
" First execution (cache miss): 0.1024 seconds\n",
1014+
" Second execution (cache hit): 0.0289 seconds\n",
1015+
"\n",
1016+
"--------------------------------------------------------------------------------\n",
1017+
"OPTIMIZATION IMPACT ANALYSIS:\n",
1018+
"--------------------------------------------------------------------------------\n",
1019+
"Vector Index Benefit: 1.75x faster (42.8% improvement)\n",
1020+
"Cache Benefit: 3.55x faster (71.8% improvement)\n",
1021+
"\n",
1022+
"Key Insights:\n",
1023+
"• Hyperscale optimization provides consistent performance benefits, especially with larger datasets\n",
1024+
"• Caching benefits apply to both raw and optimized searches\n",
1025+
"• Combined Hyperscale + Cache provides the best performance for production applications\n",
1026+
"• Hyperscale indexes scale to billions of vectors with optimized concurrent operations\n"
1027+
]
1028+
}
1029+
],
9081030
"source": [
9091031
"print(\"\\n\" + \"=\"*80)\n",
9101032
"print(\"VECTOR SEARCH PERFORMANCE OPTIMIZATION SUMMARY\")\n",
@@ -961,10 +1083,50 @@
9611083
},
9621084
{
9631085
"cell_type": "code",
964-
"execution_count": null,
1086+
"execution_count": 12,
9651087
"id": "26b9d9f4",
9661088
"metadata": {},
967-
"outputs": [],
1089+
"outputs": [
1090+
{
1091+
"name": "stdout",
1092+
"output_type": "stream",
1093+
"text": [
1094+
"\n",
1095+
"=== INTERACTIVE OPTIMIZED SEARCH ===\n",
1096+
"Query: \"What is the sample data?\"\n",
1097+
"Search Time: 0.0812 seconds\n",
1098+
"Results Found: 3 documents\n",
1099+
"\n",
1100+
"[Result 1]\n",
1101+
"Vector Distance: 0.623644 (lower = more similar)\n",
1102+
"Document Content: this is a sample text with the data \"hello\"\n",
1103+
"\n",
1104+
"[Result 2]\n",
1105+
"Vector Distance: 0.860599 (lower = more similar)\n",
1106+
"Document Content: It's used across industries for things like user profiles, dynamic product catalogs, GenAI apps, vector search, high-speed caching, and much more.\n",
1107+
"\n",
1108+
"[Result 3]\n",
1109+
"Vector Distance: 0.909207 (lower = more similar)\n",
1110+
"Document Content: Couchbase Server is a multipurpose, distributed database that fuses the strengths of relational databases such as SQL and ACID transactions with JSON's versatility, with a foundation that is extremely fast and scalable.\n"
1111+
]
1112+
},
1113+
{
1114+
"data": {
1115+
"text/plain": [
1116+
"(0.08118820190429688,\n",
1117+
" [(Document(id='e20a8dcd8b464e8e819b87c9a0ff05c3', metadata={}, page_content='this is a sample text with the data \"hello\"'),\n",
1118+
" 0.6236441411684932),\n",
1119+
" (Document(id='0442f351aec2415481138315d492ee80', metadata={}, page_content='It's used across industries for things like user profiles, dynamic product catalogs, GenAI apps, vector search, high-speed caching, and much more.'),\n",
1120+
" 0.8605992009935179),\n",
1121+
" (Document(id='7c601881e4bf4c53b5b4c2a25628d904', metadata={}, page_content='Couchbase Server is a multipurpose, distributed database that fuses the strengths of relational databases such as SQL and ACID transactions with JSON's versatility, with a foundation that is extremely fast and scalable.'),\n",
1122+
" 0.9092065785676496)])"
1123+
]
1124+
},
1125+
"execution_count": 12,
1126+
"metadata": {},
1127+
"output_type": "execute_result"
1128+
}
1129+
],
9681130
"source": [
9691131
"custom_query = input(\"Enter your search query: \")\n",
9701132
"search_with_performance_metrics(custom_query, \"Interactive Optimized Search\")\n"

0 commit comments

Comments
 (0)