1313from learning_resources .models import LearningResource
1414from learning_resources_search .connection import (
1515 get_default_alias_name ,
16+ get_vector_model_id ,
1617)
1718from learning_resources_search .constants import (
1819 COMBINED_INDEX ,
1920 CONTENT_FILE_TYPE ,
2021 COURSE_QUERY_FIELDS ,
2122 COURSE_TYPE ,
2223 DEPARTMENT_QUERY_FIELDS ,
24+ HYBRID_SEARCH_MODE ,
2325 LEARNING_RESOURCE ,
2426 LEARNING_RESOURCE_QUERY_FIELDS ,
2527 LEARNING_RESOURCE_SEARCH_SORTBY_OPTIONS ,
@@ -63,7 +65,7 @@ def gen_content_file_id(content_file_id):
6365 return f"cf_{ content_file_id } "
6466
6567
66- def relevant_indexes (resource_types , aggregations , endpoint , use_hybrid_search = False ):
68+ def relevant_indexes (resource_types , aggregations , endpoint , use_hybrid_search ):
6769 """
6870 Return list of relevent index type for the query
6971
@@ -143,7 +145,11 @@ def generate_sort_clause(search_params):
143145 return sort
144146
145147
146- def wrap_text_clause (text_query , min_score = None , use_hybrid_search = False ):
148+ def wrap_text_clause (
149+ text_query ,
150+ use_hybrid_search ,
151+ min_score = None ,
152+ ):
147153 """
148154 Wrap the text subqueries in a bool query
149155 Shared by generate_content_file_text_clause and
@@ -209,11 +215,11 @@ def generate_content_file_text_clause(text):
209215 else :
210216 text_query = {}
211217
212- return wrap_text_clause (text_query )
218+ return wrap_text_clause (text_query , use_hybrid_search = False )
213219
214220
215221def generate_learning_resources_text_clause (
216- text , search_mode , slop , content_file_score_weight , min_score , use_hybrid_search
222+ text , search_mode , slop , content_file_score_weight , min_score
217223):
218224 """
219225 Return text clause for the query
@@ -224,16 +230,23 @@ def generate_learning_resources_text_clause(
224230 dict: dictionary with the opensearch text clause
225231 """
226232
233+ use_hybrid_search = search_mode == HYBRID_SEARCH_MODE
234+
227235 query_type = (
228236 "query_string" if text .startswith ('"' ) and text .endswith ('"' ) else "multi_match"
229237 )
230238
231239 extra_params = {}
232240
233- if query_type == "multi_match" and search_mode :
234- extra_params ["type" ] = search_mode
241+ if use_hybrid_search :
242+ text_search_mode = settings .DEFAULT_SEARCH_MODE
243+ else :
244+ text_search_mode = search_mode
245+
246+ if query_type == "multi_match" :
247+ extra_params ["type" ] = text_search_mode
235248
236- if search_mode == "phrase" and slop :
249+ if text_search_mode == "phrase" and slop :
237250 extra_params ["slop" ] = slop
238251
239252 if content_file_score_weight is not None :
@@ -337,7 +350,7 @@ def generate_learning_resources_text_clause(
337350 else :
338351 text_query = {}
339352
340- return wrap_text_clause (text_query , min_score , use_hybrid_search )
353+ return wrap_text_clause (text_query , use_hybrid_search , min_score )
341354
342355
343356def generate_filter_clause (
@@ -587,7 +600,6 @@ def add_text_query_to_search(
587600 search_params .get ("slop" ),
588601 search_params .get ("content_file_score_weight" ),
589602 search_params .get ("min_score" ),
590- use_hybrid_search ,
591603 )
592604
593605 yearly_decay_percent = search_params .get ("yearly_decay_percent" )
@@ -637,11 +649,17 @@ def add_text_query_to_search(
637649 text_query = {"bool" : {"must" : [text_query ], "filter" : query_type_query }}
638650
639651 if use_hybrid_search :
652+ vector_model_id = get_vector_model_id ()
653+ if not vector_model_id :
654+ log .error ("Vector model not found. Cannot perform hybrid search." )
655+ error_message = "Vector model not found."
656+ raise ValueError (error_message )
657+
640658 vector_query_description = {
641659 "neural" : {
642660 "description_embedding" : {
643661 "query_text" : text ,
644- "model_id" : "PQBFF5oBDk6_T5cL_Izk" ,
662+ "model_id" : vector_model_id ,
645663 "min_score" : 0.015 ,
646664 },
647665 }
@@ -651,7 +669,7 @@ def add_text_query_to_search(
651669 "neural" : {
652670 "title_embedding" : {
653671 "query_text" : text ,
654- "model_id" : "PQBFF5oBDk6_T5cL_Izk" ,
672+ "model_id" : vector_model_id ,
655673 "min_score" : 0.015 ,
656674 },
657675 }
@@ -675,7 +693,7 @@ def add_text_query_to_search(
675693 return search
676694
677695
678- def construct_search (search_params ):
696+ def construct_search (search_params ): # noqa: C901
679697 """
680698 Construct a learning resources search based on the query
681699
@@ -694,7 +712,7 @@ def construct_search(search_params):
694712 ):
695713 search_params ["resource_type" ] = list (LEARNING_RESOURCE_TYPES )
696714
697- use_hybrid_search = search_params .get ("use_hybrid_search" , False )
715+ use_hybrid_search = search_params .get ("search_mode" ) == HYBRID_SEARCH_MODE
698716
699717 indexes = relevant_indexes (
700718 search_params .get ("resource_type" ),
@@ -706,7 +724,7 @@ def construct_search(search_params):
706724 search = Search (index = "," .join (indexes ))
707725
708726 search = search .source (fields = {"excludes" : SOURCE_EXCLUDED_FIELDS })
709- if not search_params . get ( " use_hybrid_search" ) :
727+ if not use_hybrid_search :
710728 search = search .params (search_type = "dfs_query_then_fetch" )
711729 if search_params .get ("offset" ):
712730 search = search .extra (from_ = search_params .get ("offset" ))
@@ -763,12 +781,12 @@ def execute_learn_search(search_params):
763781 Returns:
764782 dict: The opensearch response dict
765783 """
766- print (search_params )
767784 if search_params .get ("endpoint" ) != CONTENT_FILE_TYPE :
768785 if search_params .get ("yearly_decay_percent" ) is None :
769786 search_params ["yearly_decay_percent" ] = (
770787 settings .DEFAULT_SEARCH_STALENESS_PENALTY
771788 )
789+
772790 if search_params .get ("search_mode" ) is None :
773791 search_params ["search_mode" ] = settings .DEFAULT_SEARCH_MODE
774792 if search_params .get ("slop" ) is None :
@@ -781,7 +799,7 @@ def execute_learn_search(search_params):
781799 )
782800 search = construct_search (search_params )
783801
784- if search_params .get ("use_hybrid_search" ) :
802+ if search_params .get ("search_mode" ) == HYBRID_SEARCH_MODE :
785803 search = search .extra (
786804 search_pipeline = {
787805 "description" : "Post processor for hybrid search" ,
@@ -799,7 +817,6 @@ def execute_learn_search(search_params):
799817 }
800818 )
801819
802- print (search .to_dict ())
803820 return search .execute ().to_dict ()
804821
805822
@@ -915,7 +932,9 @@ def get_similar_topics(
915932 list of str:
916933 list of topic values
917934 """
918- indexes = relevant_indexes ([COURSE_TYPE ], [], endpoint = LEARNING_RESOURCE )
935+ indexes = relevant_indexes (
936+ [COURSE_TYPE ], [], endpoint = LEARNING_RESOURCE , use_hybrid_search = False
937+ )
919938 search = Search (index = "," .join (indexes ))
920939 search = search .filter ("term" , resource_type = COURSE_TYPE )
921940 search = search .query (
@@ -1053,7 +1072,9 @@ def get_similar_resources_opensearch(
10531072 list of str:
10541073 list of learning resources
10551074 """
1056- indexes = relevant_indexes (LEARNING_RESOURCE_TYPES , [], endpoint = LEARNING_RESOURCE )
1075+ indexes = relevant_indexes (
1076+ LEARNING_RESOURCE_TYPES , [], endpoint = LEARNING_RESOURCE , use_hybrid_search = False
1077+ )
10571078 search = Search (index = "," .join (indexes ))
10581079 if num_resources :
10591080 # adding +1 to num_resources since we filter out existing resource.id
0 commit comments